abrupt 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (84) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +22 -0
  3. data/.rubocop.yml +16 -0
  4. data/.travis.yml +34 -0
  5. data/Gemfile +4 -0
  6. data/Guardfile +51 -0
  7. data/LICENSE.txt +22 -0
  8. data/README.md +36 -0
  9. data/Rakefile +7 -0
  10. data/abrupt.gemspec +41 -0
  11. data/assets/rules/datatypes/cax-RequiredFormElement.ttl +34 -0
  12. data/assets/rules/datatypes/cax-readability.ttl +18 -0
  13. data/assets/rules/datatypes/cax-required.ttl +15 -0
  14. data/assets/rules/list/prp-hasState.ttl +10 -0
  15. data/assets/rules/production/non_required_form_element.ttl +24 -0
  16. data/assets/rules/production/state_has_no_html_element.ttl +21 -0
  17. data/assets/schema/schema.json +49 -0
  18. data/assets/schema/v1/complexity.json +142 -0
  19. data/assets/schema/v1/input.json +1136 -0
  20. data/assets/schema/v1/link.json +41 -0
  21. data/assets/schema/v1/picture.json +47 -0
  22. data/assets/schema/v1/readability.json +51 -0
  23. data/assets/schema/v1/subject.json +88 -0
  24. data/assets/voc/tbox.ttl +1632 -0
  25. data/bin/abrupt +63 -0
  26. data/doc/paper/listings/datatype_rule.ttl +0 -0
  27. data/doc/paper/listings/description_logic_infered.ttl +3 -0
  28. data/doc/paper/listings/description_logic_rule.ttl +15 -0
  29. data/doc/paper/listings/inconsistency_rule.ttl +0 -0
  30. data/doc/paper/listings/limitations.ttl +10 -0
  31. data/doc/paper/listings/production_rule.ttl +0 -0
  32. data/doc/paper/listings/propositional_logic_infered.ttl +6 -0
  33. data/doc/paper/listings/propositional_logic_rule.ttl +15 -0
  34. data/doc/paper/listings/unique_nested_uris.ttl +10 -0
  35. data/doc/paper/literature.bib +56 -0
  36. data/doc/paper/main.tex +322 -0
  37. data/doc/poster/Poster.key +0 -0
  38. data/doc/poster/Poster.pdf +0 -0
  39. data/doc/poster/poster.indd +0 -0
  40. data/doc/poster/resources/graph.graffle +0 -0
  41. data/doc/poster/resources/graph.png +0 -0
  42. data/doc/poster/resources/graph_crop.png +0 -0
  43. data/lib/abrupt.rb +90 -0
  44. data/lib/abrupt/converter.rb +130 -0
  45. data/lib/abrupt/crawler.rb +125 -0
  46. data/lib/abrupt/service/absolute_url.rb +32 -0
  47. data/lib/abrupt/service/base.rb +75 -0
  48. data/lib/abrupt/service/complexity.rb +27 -0
  49. data/lib/abrupt/service/input.rb +15 -0
  50. data/lib/abrupt/service/link.rb +15 -0
  51. data/lib/abrupt/service/picture.rb +19 -0
  52. data/lib/abrupt/service/readability.rb +26 -0
  53. data/lib/abrupt/service/subject.rb +19 -0
  54. data/lib/abrupt/transformation/base.rb +145 -0
  55. data/lib/abrupt/transformation/client/base.rb +8 -0
  56. data/lib/abrupt/transformation/client/page_view.rb +27 -0
  57. data/lib/abrupt/transformation/client/visit.rb +56 -0
  58. data/lib/abrupt/transformation/client/visitor.rb +19 -0
  59. data/lib/abrupt/transformation/website/base.rb +8 -0
  60. data/lib/abrupt/transformation/website/complexity.rb +20 -0
  61. data/lib/abrupt/transformation/website/input.rb +42 -0
  62. data/lib/abrupt/transformation/website/link.rb +27 -0
  63. data/lib/abrupt/transformation/website/picture.rb +26 -0
  64. data/lib/abrupt/transformation/website/readability.rb +15 -0
  65. data/lib/abrupt/transformation/website/subject.rb +22 -0
  66. data/lib/abrupt/version.rb +7 -0
  67. data/spec/cassettes/Abrupt_Crawler/outputs_correct_hash.yml +91250 -0
  68. data/spec/converter_spec.rb +34 -0
  69. data/spec/crawler_spec.rb +11 -0
  70. data/spec/factories/crawled_hashes.rb +468 -0
  71. data/spec/fixtures/rikscha-mainz.owl +17456 -0
  72. data/spec/fixtures/rikscha.ohneBilder.2013-04-30_2013-08-17.xml +51759 -0
  73. data/spec/fixtures/rikscha.ohneBilder.2013-04-30_2013-08-17_min.xml +81 -0
  74. data/spec/fixtures/rikscha_Result.xml +11594 -0
  75. data/spec/fixtures/rikscha_Result_min.xml +574 -0
  76. data/spec/spec_helper.rb +26 -0
  77. data/spec/transformation/base_spec.rb +18 -0
  78. data/spec/transformation/website/complexity_spec.rb +188 -0
  79. data/spec/transformation/website/input_spec.rb +181 -0
  80. data/spec/transformation/website/link_spec.rb +13 -0
  81. data/spec/transformation/website/picture_spec.rb +20 -0
  82. data/spec/transformation/website/readability_spec.rb +22 -0
  83. data/spec/transformation/website/subject_spec.rb +40 -0
  84. metadata +424 -0
@@ -0,0 +1,27 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Service
4
+ # Complexity service
5
+ # documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/complexity/public/index.php/api/v1/complexity'
6
+ class Complexity < Base
7
+ # TODO: outsource service uri to module Service
8
+ SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/complexity/public/index.php/api/v1/complexity'
9
+
10
+ def self.available_options
11
+ %w(adblock vicram vizweb color contrast ratio)
12
+ end
13
+
14
+ def service_uri
15
+ SERVICE_URI
16
+ end
17
+
18
+ def execute
19
+ super
20
+ @response['differenceMatrix']['matrix'].flatten!
21
+ @response['differenceMatrix']['palette'].flatten!
22
+ @response['contrast']['_1'] = @response['contrast'].delete '1'
23
+ @response
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,15 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Service
4
+ # Readability service
5
+ # documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
6
+ class Input < Base
7
+ # TODO: outsource service uri to module Service
8
+ SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/input/'
9
+
10
+ def service_uri
11
+ SERVICE_URI
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Service
4
+ # Readability service
5
+ # documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
6
+ class Link < Base
7
+ # TODO: outsource service uri to module Service
8
+ SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/link/'
9
+
10
+ def service_uri
11
+ SERVICE_URI
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,19 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Service
4
+ # Readability service
5
+ # documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
6
+ class Picture < Base
7
+ # TODO: outsource service uri to module Service
8
+ SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/picture/'
9
+
10
+ def self.available_options
11
+ ['url']
12
+ end
13
+
14
+ def service_uri
15
+ SERVICE_URI
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,26 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Service
4
+ # Readability service
5
+ # documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
6
+ class Readability < Base
7
+ # TODO: outsource service uri to module Service
8
+ SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
9
+
10
+ def self.available_options
11
+ ['lang']
12
+ end
13
+
14
+ def service_uri
15
+ SERVICE_URI
16
+ end
17
+
18
+ def execute
19
+ super
20
+ superfluous_keys = %w(originalText hyphenText)
21
+ @response.delete_if { |key, _value| superfluous_keys.include?(key) }
22
+ @response
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,19 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Service
4
+ # Readability service
5
+ # documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
6
+ class Subject < Base
7
+ # TODO: outsource service uri to module Service
8
+ SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/subject/'
9
+
10
+ def self.available_options
11
+ %w(lang word_limit depth)
12
+ end
13
+
14
+ def service_uri
15
+ SERVICE_URI
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,145 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Transformation
4
+ # base class
5
+ class Base
6
+ include RDF
7
+ attr_accessor :parent_uri, :uri, :values, :result, :md5
8
+
9
+ SCHEMA_MAPPING = {
10
+ integer: :to_i,
11
+ number: :to_f,
12
+ string: :to_s,
13
+ boolean: [:kind_of?, Object]
14
+ }
15
+
16
+ # Initializes Transformer for Individual Statement for parent_uri & uri.
17
+ # @param parent_uri [Array] the parent uri in array structure of paths
18
+ # @param uri [Array] the uri as array structure of path and id
19
+ # @example
20
+ # Readability.new([
21
+ # 'Website',
22
+ # 'http://www.rikscha-mainz.de',
23
+ # 'Page',
24
+ # 'http://www.rikscha-mainz.de/Angebote'
25
+ # ], [
26
+ # 'State',
27
+ # 'state54'
28
+ # ])
29
+ def initialize(parent_uri, uri, values = {})
30
+ @parent_uri = parent_uri.to_a.map(&:remove_last_slashes)
31
+ @uri = uri.to_a.map(&:remove_last_slashes)
32
+ @values = values
33
+ @result = []
34
+ end
35
+
36
+ # rubocop:disable all
37
+ def self.customize_to_schema(values)
38
+ @values = values
39
+ keyname = name.split('::').last.downcase.to_sym
40
+ schema_file = File.join Abrupt.root, 'assets', 'schema', 'v1', "#{keyname}.json"
41
+ return values unless File.exist?(schema_file)
42
+ schema = ::JSON.load(File.read(schema_file)).deep_symbolize_keys
43
+ # :button => ..., :text => {:type => "array", :items => {...}}
44
+ schema[:properties][keyname][:properties].each do |state_key, state_schema|
45
+ set_value(state_key, state_schema, [':state', ":#{keyname}"])
46
+ end
47
+ @values
48
+ end
49
+
50
+ def self.set_value(key, schema, ref)
51
+ ref << ":#{key}"
52
+ key_string = '[' + ref.join('][') + ']'
53
+ value = eval "@values#{key_string}" rescue nil
54
+ return unless value
55
+ case schema[:type]
56
+ when 'array'
57
+ case schema[:items][:type]
58
+ when 'object'
59
+ # :name => { :type => :string }
60
+ schema[:items][:properties].each do |arr_key, arr_val|
61
+ eval "@values#{key_string} = [value].flatten.compact" unless value.is_a? Array
62
+ value.each_with_index do |_obj, i|
63
+ set_value arr_key, arr_val, ref.dup + [i]
64
+ end
65
+ end
66
+ end
67
+ when 'object'
68
+ schema[:properties].each do |schema_key, schema_value|
69
+ set_value(schema_key, schema_value, ref.dup)
70
+ end
71
+ else
72
+ if value.is_a? Array
73
+ value.each_with_index do |val, i|
74
+ eval "@values#{key_string}[i] = val.send(*SCHEMA_MAPPING[schema[:type].to_sym])"
75
+ end
76
+ else
77
+ eval "@values#{key_string} = value.send(*SCHEMA_MAPPING[schema[:type].to_sym])"
78
+ end
79
+ end
80
+ end
81
+
82
+ # rubocop:enable all
83
+
84
+ def add_individuals
85
+ add_individual
86
+ return @result unless @values[keyname]
87
+ @values[keyname].each do |k, v|
88
+ s = k.to_s.eql?('language') ? "#{keyname}Language" : k
89
+ add_data_property s, v
90
+ end
91
+ @result
92
+ end
93
+
94
+ # Returns the class name
95
+ def class_name
96
+ self.class.name.split('::').last
97
+ end
98
+
99
+ # Returns the keyname
100
+ # @example:
101
+ # Readability.new(parent_uri, uri).keyname
102
+ # => :readability
103
+ def keyname
104
+ class_name.downcase.to_sym
105
+ end
106
+
107
+ def resolve_parent_uri_part
108
+ "#{VOC}#{@parent_uri.join('/')}"
109
+ end
110
+
111
+ def resolve_parent_uri
112
+ RDF::URI(resolve_parent_uri_part)
113
+ end
114
+
115
+ def resolve_uri_part(name)
116
+ if @uri.empty?
117
+ "#{class_name}/#{name}"
118
+ else
119
+ "#{@uri.join('/')}"
120
+ end
121
+ end
122
+
123
+ def resolve_uri(name = nil)
124
+ name ||= @uri.last
125
+ RDF::URI(resolve_parent_uri_part + '/' + resolve_uri_part(name))
126
+ end
127
+
128
+ def add_individual(name = @values[:name], klass = nil)
129
+ klass ||= @uri.empty? ? class_name : @uri.first
130
+ uri = resolve_uri(name)
131
+ @result << Statement.new(uri, RDF.type, VOC[klass])
132
+ @result << Statement.new(resolve_parent_uri, VOC["has#{klass}"], uri)
133
+ end
134
+
135
+ def add_data_property(type, value, name = @values[:name])
136
+ @result << Statement.new(resolve_uri(name), VOC[type], value)
137
+ end
138
+
139
+ def add_object_property(parent_uri, type, child_uri)
140
+ parent_uri = RDF::URI(parent_uri) if parent_uri.is_a?(String)
141
+ @result << Statement.new(parent_uri, VOC["has#{type}"], child_uri)
142
+ end
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,8 @@
1
+ module Abrupt
2
+ module Transformation
3
+ module Client
4
+ class Base < Transformation::Base
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,27 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Transformation
4
+ module Client
5
+ # Transformation clas for client visit data
6
+ class PageView < Transformation::Base
7
+ def add_individuals
8
+ datetime = @values['datetime']
9
+ return @result unless datetime
10
+ @values[:name] = ::Abrupt.format_time(datetime)
11
+ super
12
+ @values.each do |_i, attr|
13
+ next if attr.is_a?(String)
14
+ name = attr.name.eql?('name') ? 'inputname' : attr.name
15
+ value = if name.eql?('datetime')
16
+ Abrupt.parse_time(attr.value)
17
+ else
18
+ CGI.escape(attr.value)
19
+ end
20
+ add_data_property(name, value)
21
+ end
22
+ @result
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,56 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Transformation
4
+ module Client
5
+ # Transformation clas for client visit data
6
+ class Visit < Base
7
+ def add_individuals
8
+ @values[:name] = @uri.last
9
+ super
10
+ @values.each do |key, value|
11
+ add_property(key, value) if value
12
+ end
13
+ add_individuals_for_view
14
+ @result
15
+ end
16
+
17
+ def add_property(key, value)
18
+ enumerable = value.is_a?(Hash) || value.is_a?(Array)
19
+ return if enumerable # value.is_a?(Enumerable)
20
+ name = case key
21
+ when 'uri'
22
+ uri = [@parent_uri[1], value].map(&:remove_last_slashes)
23
+ parent_uri_path = (@parent_uri[0..-3] + ['Page', uri.join])
24
+ parent_uri = "#{VOC}#{parent_uri_path.join('/')}"
25
+ # Page hasVisit visit
26
+ add_object_property(parent_uri, 'PageVisit', resolve_uri)
27
+ key
28
+ when 'size' # TODO: transform via customize_to_schema
29
+ 'contentlength'
30
+ else
31
+ key
32
+ end
33
+ add_data_property(name, CGI.escape(value))
34
+ end
35
+
36
+ def add_individuals_for_view
37
+ page_views = @values[:view]
38
+ return unless page_views
39
+ page_views.each do |type, view|
40
+ [view].flatten.each do |attributes|
41
+ add_page_view(type.to_s.capitalize, attributes)
42
+ end
43
+ end
44
+ end
45
+
46
+ def add_page_view(type, attributes)
47
+ time = ::Abrupt.format_time(attributes[:datetime])
48
+ page_view = PageView.new(@parent_uri + @uri,
49
+ [type, time],
50
+ attributes)
51
+ @result += page_view.add_individuals
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,19 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Transformation
4
+ module Client
5
+ # Transformation class for client visit data
6
+ class Visitor < Transformation::Base
7
+ def add_individuals
8
+ return @result unless @values
9
+ @values[:name] = @values[:ip]
10
+ super
11
+ @values.each do |key, value|
12
+ add_data_property(key, value) if value.is_a?(String)
13
+ end
14
+ @result
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,8 @@
1
+ module Abrupt
2
+ module Transformation
3
+ module Website
4
+ class Base < Transformation::Base
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,20 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Transformation
4
+ module Website
5
+ # Complexity service
6
+ # documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/complexity/public/index.php/api/v1/complexity'
7
+ class Complexity < Base
8
+ def add_individuals
9
+ @uri = @parent_uri.slice!(-2, 2)
10
+ return @result unless @values[keyname]
11
+ # flatten vicram complexity
12
+ @values[keyname][:vicramComplexity] =
13
+ @values[keyname][:vicram].delete(:complexity)
14
+ @values[keyname].delete :vicram
15
+ super
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,42 @@
1
+ # @author Manuel Dudda
2
+ module Abrupt
3
+ module Transformation
4
+ module Website
5
+ # Input service
6
+ # documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/input/'
7
+ # schema located in {PROJECT_ROOT}/assets/schema/v1/input.json
8
+ class Input < Base
9
+ attr_accessor :form_uri
10
+
11
+ def add_individuals
12
+ return @result unless @values[keyname]
13
+ form_id = Digest::MD5.hexdigest(@values[keyname].to_s)
14
+ @uri = ['Form', form_id]
15
+ add_individual
16
+ @parent_uri += @uri
17
+ @values[keyname].each do |input_type, inputs|
18
+ add_individuals_for_inputs(inputs, input_type)
19
+ end
20
+ @result
21
+ end
22
+
23
+ def add_individuals_for_inputs(inputs, input_type)
24
+ [inputs].flatten.compact.each do |input|
25
+ form_element_id = input[:id] || Digest::MD5.hexdigest(input.to_s)
26
+ @uri = [input_type.to_s.camelcase, form_element_id]
27
+ add_individual
28
+ add_data_properties input
29
+ end
30
+ end
31
+
32
+ def add_data_properties(input)
33
+ input.each do |type, value|
34
+ next unless type && value
35
+ v = value.is_a?(String) ? CGI.escapeHTML(value) : value
36
+ add_data_property type, v
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end