abrupt 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +22 -0
  3. data/.rubocop.yml +16 -0
  4. data/.travis.yml +34 -0
  5. data/Gemfile +4 -0
  6. data/Guardfile +51 -0
  7. data/LICENSE.txt +22 -0
  8. data/README.md +36 -0
  9. data/Rakefile +7 -0
  10. data/abrupt.gemspec +41 -0
  11. data/assets/rules/datatypes/cax-RequiredFormElement.ttl +34 -0
  12. data/assets/rules/datatypes/cax-readability.ttl +18 -0
  13. data/assets/rules/datatypes/cax-required.ttl +15 -0
  14. data/assets/rules/list/prp-hasState.ttl +10 -0
  15. data/assets/rules/production/non_required_form_element.ttl +24 -0
  16. data/assets/rules/production/state_has_no_html_element.ttl +21 -0
  17. data/assets/schema/schema.json +49 -0
  18. data/assets/schema/v1/complexity.json +142 -0
  19. data/assets/schema/v1/input.json +1136 -0
  20. data/assets/schema/v1/link.json +41 -0
  21. data/assets/schema/v1/picture.json +47 -0
  22. data/assets/schema/v1/readability.json +51 -0
  23. data/assets/schema/v1/subject.json +88 -0
  24. data/assets/voc/tbox.ttl +1632 -0
  25. data/bin/abrupt +63 -0
  26. data/doc/paper/listings/datatype_rule.ttl +0 -0
  27. data/doc/paper/listings/description_logic_infered.ttl +3 -0
  28. data/doc/paper/listings/description_logic_rule.ttl +15 -0
  29. data/doc/paper/listings/inconsistency_rule.ttl +0 -0
  30. data/doc/paper/listings/limitations.ttl +10 -0
  31. data/doc/paper/listings/production_rule.ttl +0 -0
  32. data/doc/paper/listings/propositional_logic_infered.ttl +6 -0
  33. data/doc/paper/listings/propositional_logic_rule.ttl +15 -0
  34. data/doc/paper/listings/unique_nested_uris.ttl +10 -0
  35. data/doc/paper/literature.bib +56 -0
  36. data/doc/paper/main.tex +322 -0
  37. data/doc/poster/Poster.key +0 -0
  38. data/doc/poster/Poster.pdf +0 -0
  39. data/doc/poster/poster.indd +0 -0
  40. data/doc/poster/resources/graph.graffle +0 -0
  41. data/doc/poster/resources/graph.png +0 -0
  42. data/doc/poster/resources/graph_crop.png +0 -0
  43. data/lib/abrupt.rb +90 -0
  44. data/lib/abrupt/converter.rb +130 -0
  45. data/lib/abrupt/crawler.rb +125 -0
  46. data/lib/abrupt/service/absolute_url.rb +32 -0
  47. data/lib/abrupt/service/base.rb +75 -0
  48. data/lib/abrupt/service/complexity.rb +27 -0
  49. data/lib/abrupt/service/input.rb +15 -0
  50. data/lib/abrupt/service/link.rb +15 -0
  51. data/lib/abrupt/service/picture.rb +19 -0
  52. data/lib/abrupt/service/readability.rb +26 -0
  53. data/lib/abrupt/service/subject.rb +19 -0
  54. data/lib/abrupt/transformation/base.rb +145 -0
  55. data/lib/abrupt/transformation/client/base.rb +8 -0
  56. data/lib/abrupt/transformation/client/page_view.rb +27 -0
  57. data/lib/abrupt/transformation/client/visit.rb +56 -0
  58. data/lib/abrupt/transformation/client/visitor.rb +19 -0
  59. data/lib/abrupt/transformation/website/base.rb +8 -0
  60. data/lib/abrupt/transformation/website/complexity.rb +20 -0
  61. data/lib/abrupt/transformation/website/input.rb +42 -0
  62. data/lib/abrupt/transformation/website/link.rb +27 -0
  63. data/lib/abrupt/transformation/website/picture.rb +26 -0
  64. data/lib/abrupt/transformation/website/readability.rb +15 -0
  65. data/lib/abrupt/transformation/website/subject.rb +22 -0
  66. data/lib/abrupt/version.rb +7 -0
  67. data/spec/cassettes/Abrupt_Crawler/outputs_correct_hash.yml +91250 -0
  68. data/spec/converter_spec.rb +34 -0
  69. data/spec/crawler_spec.rb +11 -0
  70. data/spec/factories/crawled_hashes.rb +468 -0
  71. data/spec/fixtures/rikscha-mainz.owl +17456 -0
  72. data/spec/fixtures/rikscha.ohneBilder.2013-04-30_2013-08-17.xml +51759 -0
  73. data/spec/fixtures/rikscha.ohneBilder.2013-04-30_2013-08-17_min.xml +81 -0
  74. data/spec/fixtures/rikscha_Result.xml +11594 -0
  75. data/spec/fixtures/rikscha_Result_min.xml +574 -0
  76. data/spec/spec_helper.rb +26 -0
  77. data/spec/transformation/base_spec.rb +18 -0
  78. data/spec/transformation/website/complexity_spec.rb +188 -0
  79. data/spec/transformation/website/input_spec.rb +181 -0
  80. data/spec/transformation/website/link_spec.rb +13 -0
  81. data/spec/transformation/website/picture_spec.rb +20 -0
  82. data/spec/transformation/website/readability_spec.rb +22 -0
  83. data/spec/transformation/website/subject_spec.rb +40 -0
  84. metadata +424 -0
@@ -0,0 +1,27 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Service
4
+ # Complexity service
5
+ # documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/complexity/public/index.php/api/v1/complexity'
6
+ class Complexity < Base
7
+ # TODO: outsource service uri to module Service
8
+ SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/complexity/public/index.php/api/v1/complexity'
9
+
10
+ def self.available_options
11
+ %w(adblock vicram vizweb color contrast ratio)
12
+ end
13
+
14
+ def service_uri
15
+ SERVICE_URI
16
+ end
17
+
18
+ def execute
19
+ super
20
+ @response['differenceMatrix']['matrix'].flatten!
21
+ @response['differenceMatrix']['palette'].flatten!
22
+ @response['contrast']['_1'] = @response['contrast'].delete '1'
23
+ @response
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,15 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Service
4
+ # Readability service
5
+ # documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
6
+ class Input < Base
7
+ # TODO: outsource service uri to module Service
8
+ SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/input/'
9
+
10
+ def service_uri
11
+ SERVICE_URI
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Service
4
+ # Readability service
5
+ # documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
6
+ class Link < Base
7
+ # TODO: outsource service uri to module Service
8
+ SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/link/'
9
+
10
+ def service_uri
11
+ SERVICE_URI
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,19 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Service
4
+ # Readability service
5
+ # documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
6
+ class Picture < Base
7
+ # TODO: outsource service uri to module Service
8
+ SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/picture/'
9
+
10
+ def self.available_options
11
+ ['url']
12
+ end
13
+
14
+ def service_uri
15
+ SERVICE_URI
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,26 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Service
4
+ # Readability service
5
+ # documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
6
+ class Readability < Base
7
+ # TODO: outsource service uri to module Service
8
+ SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
9
+
10
+ def self.available_options
11
+ ['lang']
12
+ end
13
+
14
+ def service_uri
15
+ SERVICE_URI
16
+ end
17
+
18
+ def execute
19
+ super
20
+ superfluous_keys = %w(originalText hyphenText)
21
+ @response.delete_if { |key, _value| superfluous_keys.include?(key) }
22
+ @response
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,19 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Service
4
+ # Readability service
5
+ # documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
6
+ class Subject < Base
7
+ # TODO: outsource service uri to module Service
8
+ SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/subject/'
9
+
10
+ def self.available_options
11
+ %w(lang word_limit depth)
12
+ end
13
+
14
+ def service_uri
15
+ SERVICE_URI
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,145 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Transformation
4
+ # base class
5
+ class Base
6
+ include RDF
7
+ attr_accessor :parent_uri, :uri, :values, :result, :md5
8
+
9
+ SCHEMA_MAPPING = {
10
+ integer: :to_i,
11
+ number: :to_f,
12
+ string: :to_s,
13
+ boolean: [:kind_of?, Object]
14
+ }
15
+
16
+ # Initializes Transformer for Individual Statement for parent_uri & uri.
17
+ # @param parent_uri [Array] the parent uri in array structure of paths
18
+ # @param uri [Array] the uri as array structure of path and id
19
+ # @example
20
+ # Readability.new([
21
+ # 'Website',
22
+ # 'http://www.rikscha-mainz.de',
23
+ # 'Page',
24
+ # 'http://www.rikscha-mainz.de/Angebote'
25
+ # ], [
26
+ # 'State',
27
+ # 'state54'
28
+ # ])
29
+ def initialize(parent_uri, uri, values = {})
30
+ @parent_uri = parent_uri.to_a.map(&:remove_last_slashes)
31
+ @uri = uri.to_a.map(&:remove_last_slashes)
32
+ @values = values
33
+ @result = []
34
+ end
35
+
36
+ # rubocop:disable all
37
+ def self.customize_to_schema(values)
38
+ @values = values
39
+ keyname = name.split('::').last.downcase.to_sym
40
+ schema_file = File.join Abrupt.root, 'assets', 'schema', 'v1', "#{keyname}.json"
41
+ return values unless File.exist?(schema_file)
42
+ schema = ::JSON.load(File.read(schema_file)).deep_symbolize_keys
43
+ # :button => ..., :text => {:type => "array", :items => {...}}
44
+ schema[:properties][keyname][:properties].each do |state_key, state_schema|
45
+ set_value(state_key, state_schema, [':state', ":#{keyname}"])
46
+ end
47
+ @values
48
+ end
49
+
50
+ def self.set_value(key, schema, ref)
51
+ ref << ":#{key}"
52
+ key_string = '[' + ref.join('][') + ']'
53
+ value = eval "@values#{key_string}" rescue nil
54
+ return unless value
55
+ case schema[:type]
56
+ when 'array'
57
+ case schema[:items][:type]
58
+ when 'object'
59
+ # :name => { :type => :string }
60
+ schema[:items][:properties].each do |arr_key, arr_val|
61
+ eval "@values#{key_string} = [value].flatten.compact" unless value.is_a? Array
62
+ value.each_with_index do |_obj, i|
63
+ set_value arr_key, arr_val, ref.dup + [i]
64
+ end
65
+ end
66
+ end
67
+ when 'object'
68
+ schema[:properties].each do |schema_key, schema_value|
69
+ set_value(schema_key, schema_value, ref.dup)
70
+ end
71
+ else
72
+ if value.is_a? Array
73
+ value.each_with_index do |val, i|
74
+ eval "@values#{key_string}[i] = val.send(*SCHEMA_MAPPING[schema[:type].to_sym])"
75
+ end
76
+ else
77
+ eval "@values#{key_string} = value.send(*SCHEMA_MAPPING[schema[:type].to_sym])"
78
+ end
79
+ end
80
+ end
81
+
82
+ # rubocop:enable all
83
+
84
+ def add_individuals
85
+ add_individual
86
+ return @result unless @values[keyname]
87
+ @values[keyname].each do |k, v|
88
+ s = k.to_s.eql?('language') ? "#{keyname}Language" : k
89
+ add_data_property s, v
90
+ end
91
+ @result
92
+ end
93
+
94
+ # Returns the class name
95
+ def class_name
96
+ self.class.name.split('::').last
97
+ end
98
+
99
+ # Returns the keyname
100
+ # @example:
101
+ # Readability.new(parent_uri, uri).keyname
102
+ # => :readability
103
+ def keyname
104
+ class_name.downcase.to_sym
105
+ end
106
+
107
+ def resolve_parent_uri_part
108
+ "#{VOC}#{@parent_uri.join('/')}"
109
+ end
110
+
111
+ def resolve_parent_uri
112
+ RDF::URI(resolve_parent_uri_part)
113
+ end
114
+
115
+ def resolve_uri_part(name)
116
+ if @uri.empty?
117
+ "#{class_name}/#{name}"
118
+ else
119
+ "#{@uri.join('/')}"
120
+ end
121
+ end
122
+
123
+ def resolve_uri(name = nil)
124
+ name ||= @uri.last
125
+ RDF::URI(resolve_parent_uri_part + '/' + resolve_uri_part(name))
126
+ end
127
+
128
+ def add_individual(name = @values[:name], klass = nil)
129
+ klass ||= @uri.empty? ? class_name : @uri.first
130
+ uri = resolve_uri(name)
131
+ @result << Statement.new(uri, RDF.type, VOC[klass])
132
+ @result << Statement.new(resolve_parent_uri, VOC["has#{klass}"], uri)
133
+ end
134
+
135
+ def add_data_property(type, value, name = @values[:name])
136
+ @result << Statement.new(resolve_uri(name), VOC[type], value)
137
+ end
138
+
139
+ def add_object_property(parent_uri, type, child_uri)
140
+ parent_uri = RDF::URI(parent_uri) if parent_uri.is_a?(String)
141
+ @result << Statement.new(parent_uri, VOC["has#{type}"], child_uri)
142
+ end
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,8 @@
1
+ module Abrupt
2
+ module Transformation
3
+ module Client
4
+ class Base < Transformation::Base
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,27 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Transformation
4
+ module Client
5
+ # Transformation clas for client visit data
6
+ class PageView < Transformation::Base
7
+ def add_individuals
8
+ datetime = @values['datetime']
9
+ return @result unless datetime
10
+ @values[:name] = ::Abrupt.format_time(datetime)
11
+ super
12
+ @values.each do |_i, attr|
13
+ next if attr.is_a?(String)
14
+ name = attr.name.eql?('name') ? 'inputname' : attr.name
15
+ value = if name.eql?('datetime')
16
+ Abrupt.parse_time(attr.value)
17
+ else
18
+ CGI.escape(attr.value)
19
+ end
20
+ add_data_property(name, value)
21
+ end
22
+ @result
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,56 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Transformation
4
+ module Client
5
+ # Transformation clas for client visit data
6
+ class Visit < Base
7
+ def add_individuals
8
+ @values[:name] = @uri.last
9
+ super
10
+ @values.each do |key, value|
11
+ add_property(key, value) if value
12
+ end
13
+ add_individuals_for_view
14
+ @result
15
+ end
16
+
17
+ def add_property(key, value)
18
+ enumerable = value.is_a?(Hash) || value.is_a?(Array)
19
+ return if enumerable # value.is_a?(Enumerable)
20
+ name = case key
21
+ when 'uri'
22
+ uri = [@parent_uri[1], value].map(&:remove_last_slashes)
23
+ parent_uri_path = (@parent_uri[0..-3] + ['Page', uri.join])
24
+ parent_uri = "#{VOC}#{parent_uri_path.join('/')}"
25
+ # Page hasVisit visit
26
+ add_object_property(parent_uri, 'PageVisit', resolve_uri)
27
+ key
28
+ when 'size' # TODO: transform via customize_to_schema
29
+ 'contentlength'
30
+ else
31
+ key
32
+ end
33
+ add_data_property(name, CGI.escape(value))
34
+ end
35
+
36
+ def add_individuals_for_view
37
+ page_views = @values[:view]
38
+ return unless page_views
39
+ page_views.each do |type, view|
40
+ [view].flatten.each do |attributes|
41
+ add_page_view(type.to_s.capitalize, attributes)
42
+ end
43
+ end
44
+ end
45
+
46
+ def add_page_view(type, attributes)
47
+ time = ::Abrupt.format_time(attributes[:datetime])
48
+ page_view = PageView.new(@parent_uri + @uri,
49
+ [type, time],
50
+ attributes)
51
+ @result += page_view.add_individuals
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,19 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Transformation
4
+ module Client
5
+ # Transformation class for client visit data
6
+ class Visitor < Transformation::Base
7
+ def add_individuals
8
+ return @result unless @values
9
+ @values[:name] = @values[:ip]
10
+ super
11
+ @values.each do |key, value|
12
+ add_data_property(key, value) if value.is_a?(String)
13
+ end
14
+ @result
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,8 @@
1
+ module Abrupt
2
+ module Transformation
3
+ module Website
4
+ class Base < Transformation::Base
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,20 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Transformation
4
+ module Website
5
+ # Complexity service
6
+ # documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/complexity/public/index.php/api/v1/complexity'
7
+ class Complexity < Base
8
+ def add_individuals
9
+ @uri = @parent_uri.slice!(-2, 2)
10
+ return @result unless @values[keyname]
11
+ # flatten vicram complexity
12
+ @values[keyname][:vicramComplexity] =
13
+ @values[keyname][:vicram].delete(:complexity)
14
+ @values[keyname].delete :vicram
15
+ super
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,42 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Transformation
4
+ module Website
5
+ # Input service
6
+ # documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/input/'
7
+ # schema located in {PROJECT_ROOT}/assets/schema/v1/input.json
8
+ class Input < Base
9
+ attr_accessor :form_uri
10
+
11
+ def add_individuals
12
+ return @result unless @values[keyname]
13
+ form_id = Digest::MD5.hexdigest(@values[keyname].to_s)
14
+ @uri = ['Form', form_id]
15
+ add_individual
16
+ @parent_uri += @uri
17
+ @values[keyname].each do |input_type, inputs|
18
+ add_individuals_for_inputs(inputs, input_type)
19
+ end
20
+ @result
21
+ end
22
+
23
+ def add_individuals_for_inputs(inputs, input_type)
24
+ [inputs].flatten.compact.each do |input|
25
+ form_element_id = input[:id] || Digest::MD5.hexdigest(input.to_s)
26
+ @uri = [input_type.to_s.camelcase, form_element_id]
27
+ add_individual
28
+ add_data_properties input
29
+ end
30
+ end
31
+
32
+ def add_data_properties(input)
33
+ input.each do |type, value|
34
+ next unless type && value
35
+ v = value.is_a?(String) ? CGI.escapeHTML(value) : value
36
+ add_data_property type, v
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end