abrupt 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/.rubocop.yml +16 -0
- data/.travis.yml +34 -0
- data/Gemfile +4 -0
- data/Guardfile +51 -0
- data/LICENSE.txt +22 -0
- data/README.md +36 -0
- data/Rakefile +7 -0
- data/abrupt.gemspec +41 -0
- data/assets/rules/datatypes/cax-RequiredFormElement.ttl +34 -0
- data/assets/rules/datatypes/cax-readability.ttl +18 -0
- data/assets/rules/datatypes/cax-required.ttl +15 -0
- data/assets/rules/list/prp-hasState.ttl +10 -0
- data/assets/rules/production/non_required_form_element.ttl +24 -0
- data/assets/rules/production/state_has_no_html_element.ttl +21 -0
- data/assets/schema/schema.json +49 -0
- data/assets/schema/v1/complexity.json +142 -0
- data/assets/schema/v1/input.json +1136 -0
- data/assets/schema/v1/link.json +41 -0
- data/assets/schema/v1/picture.json +47 -0
- data/assets/schema/v1/readability.json +51 -0
- data/assets/schema/v1/subject.json +88 -0
- data/assets/voc/tbox.ttl +1632 -0
- data/bin/abrupt +63 -0
- data/doc/paper/listings/datatype_rule.ttl +0 -0
- data/doc/paper/listings/description_logic_infered.ttl +3 -0
- data/doc/paper/listings/description_logic_rule.ttl +15 -0
- data/doc/paper/listings/inconsistency_rule.ttl +0 -0
- data/doc/paper/listings/limitations.ttl +10 -0
- data/doc/paper/listings/production_rule.ttl +0 -0
- data/doc/paper/listings/propositional_logic_infered.ttl +6 -0
- data/doc/paper/listings/propositional_logic_rule.ttl +15 -0
- data/doc/paper/listings/unique_nested_uris.ttl +10 -0
- data/doc/paper/literature.bib +56 -0
- data/doc/paper/main.tex +322 -0
- data/doc/poster/Poster.key +0 -0
- data/doc/poster/Poster.pdf +0 -0
- data/doc/poster/poster.indd +0 -0
- data/doc/poster/resources/graph.graffle +0 -0
- data/doc/poster/resources/graph.png +0 -0
- data/doc/poster/resources/graph_crop.png +0 -0
- data/lib/abrupt.rb +90 -0
- data/lib/abrupt/converter.rb +130 -0
- data/lib/abrupt/crawler.rb +125 -0
- data/lib/abrupt/service/absolute_url.rb +32 -0
- data/lib/abrupt/service/base.rb +75 -0
- data/lib/abrupt/service/complexity.rb +27 -0
- data/lib/abrupt/service/input.rb +15 -0
- data/lib/abrupt/service/link.rb +15 -0
- data/lib/abrupt/service/picture.rb +19 -0
- data/lib/abrupt/service/readability.rb +26 -0
- data/lib/abrupt/service/subject.rb +19 -0
- data/lib/abrupt/transformation/base.rb +145 -0
- data/lib/abrupt/transformation/client/base.rb +8 -0
- data/lib/abrupt/transformation/client/page_view.rb +27 -0
- data/lib/abrupt/transformation/client/visit.rb +56 -0
- data/lib/abrupt/transformation/client/visitor.rb +19 -0
- data/lib/abrupt/transformation/website/base.rb +8 -0
- data/lib/abrupt/transformation/website/complexity.rb +20 -0
- data/lib/abrupt/transformation/website/input.rb +42 -0
- data/lib/abrupt/transformation/website/link.rb +27 -0
- data/lib/abrupt/transformation/website/picture.rb +26 -0
- data/lib/abrupt/transformation/website/readability.rb +15 -0
- data/lib/abrupt/transformation/website/subject.rb +22 -0
- data/lib/abrupt/version.rb +7 -0
- data/spec/cassettes/Abrupt_Crawler/outputs_correct_hash.yml +91250 -0
- data/spec/converter_spec.rb +34 -0
- data/spec/crawler_spec.rb +11 -0
- data/spec/factories/crawled_hashes.rb +468 -0
- data/spec/fixtures/rikscha-mainz.owl +17456 -0
- data/spec/fixtures/rikscha.ohneBilder.2013-04-30_2013-08-17.xml +51759 -0
- data/spec/fixtures/rikscha.ohneBilder.2013-04-30_2013-08-17_min.xml +81 -0
- data/spec/fixtures/rikscha_Result.xml +11594 -0
- data/spec/fixtures/rikscha_Result_min.xml +574 -0
- data/spec/spec_helper.rb +26 -0
- data/spec/transformation/base_spec.rb +18 -0
- data/spec/transformation/website/complexity_spec.rb +188 -0
- data/spec/transformation/website/input_spec.rb +181 -0
- data/spec/transformation/website/link_spec.rb +13 -0
- data/spec/transformation/website/picture_spec.rb +20 -0
- data/spec/transformation/website/readability_spec.rb +22 -0
- data/spec/transformation/website/subject_spec.rb +40 -0
- metadata +424 -0
@@ -0,0 +1,27 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Service
|
4
|
+
# Complexity service
|
5
|
+
# documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/complexity/public/index.php/api/v1/complexity'
|
6
|
+
class Complexity < Base
|
7
|
+
# TODO: outsource service uri to module Service
|
8
|
+
SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/complexity/public/index.php/api/v1/complexity'
|
9
|
+
|
10
|
+
def self.available_options
|
11
|
+
%w(adblock vicram vizweb color contrast ratio)
|
12
|
+
end
|
13
|
+
|
14
|
+
def service_uri
|
15
|
+
SERVICE_URI
|
16
|
+
end
|
17
|
+
|
18
|
+
def execute
|
19
|
+
super
|
20
|
+
@response['differenceMatrix']['matrix'].flatten!
|
21
|
+
@response['differenceMatrix']['palette'].flatten!
|
22
|
+
@response['contrast']['_1'] = @response['contrast'].delete '1'
|
23
|
+
@response
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Service
|
4
|
+
# Readability service
|
5
|
+
# documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
|
6
|
+
class Input < Base
|
7
|
+
# TODO: outsource service uri to module Service
|
8
|
+
SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/input/'
|
9
|
+
|
10
|
+
def service_uri
|
11
|
+
SERVICE_URI
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Service
|
4
|
+
# Readability service
|
5
|
+
# documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
|
6
|
+
class Link < Base
|
7
|
+
# TODO: outsource service uri to module Service
|
8
|
+
SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/link/'
|
9
|
+
|
10
|
+
def service_uri
|
11
|
+
SERVICE_URI
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Service
|
4
|
+
# Readability service
|
5
|
+
# documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
|
6
|
+
class Picture < Base
|
7
|
+
# TODO: outsource service uri to module Service
|
8
|
+
SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/picture/'
|
9
|
+
|
10
|
+
def self.available_options
|
11
|
+
['url']
|
12
|
+
end
|
13
|
+
|
14
|
+
def service_uri
|
15
|
+
SERVICE_URI
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Service
|
4
|
+
# Readability service
|
5
|
+
# documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
|
6
|
+
class Readability < Base
|
7
|
+
# TODO: outsource service uri to module Service
|
8
|
+
SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
|
9
|
+
|
10
|
+
def self.available_options
|
11
|
+
['lang']
|
12
|
+
end
|
13
|
+
|
14
|
+
def service_uri
|
15
|
+
SERVICE_URI
|
16
|
+
end
|
17
|
+
|
18
|
+
def execute
|
19
|
+
super
|
20
|
+
superfluous_keys = %w(originalText hyphenText)
|
21
|
+
@response.delete_if { |key, _value| superfluous_keys.include?(key) }
|
22
|
+
@response
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Service
|
4
|
+
# Readability service
|
5
|
+
# documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
|
6
|
+
class Subject < Base
|
7
|
+
# TODO: outsource service uri to module Service
|
8
|
+
SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/subject/'
|
9
|
+
|
10
|
+
def self.available_options
|
11
|
+
%w(lang word_limit depth)
|
12
|
+
end
|
13
|
+
|
14
|
+
def service_uri
|
15
|
+
SERVICE_URI
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,145 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Transformation
|
4
|
+
# base class
|
5
|
+
class Base
|
6
|
+
include RDF
|
7
|
+
attr_accessor :parent_uri, :uri, :values, :result, :md5
|
8
|
+
|
9
|
+
SCHEMA_MAPPING = {
|
10
|
+
integer: :to_i,
|
11
|
+
number: :to_f,
|
12
|
+
string: :to_s,
|
13
|
+
boolean: [:kind_of?, Object]
|
14
|
+
}
|
15
|
+
|
16
|
+
# Initializes Transformer for Individual Statement for parent_uri & uri.
|
17
|
+
# @param parent_uri [Array] the parent uri in array structure of paths
|
18
|
+
# @param uri [Array] the uri as array structure of path and id
|
19
|
+
# @example
|
20
|
+
# Readability.new([
|
21
|
+
# 'Website',
|
22
|
+
# 'http://www.rikscha-mainz.de',
|
23
|
+
# 'Page',
|
24
|
+
# 'http://www.rikscha-mainz.de/Angebote'
|
25
|
+
# ], [
|
26
|
+
# 'State',
|
27
|
+
# 'state54'
|
28
|
+
# ])
|
29
|
+
def initialize(parent_uri, uri, values = {})
|
30
|
+
@parent_uri = parent_uri.to_a.map(&:remove_last_slashes)
|
31
|
+
@uri = uri.to_a.map(&:remove_last_slashes)
|
32
|
+
@values = values
|
33
|
+
@result = []
|
34
|
+
end
|
35
|
+
|
36
|
+
# rubocop:disable all
|
37
|
+
def self.customize_to_schema(values)
|
38
|
+
@values = values
|
39
|
+
keyname = name.split('::').last.downcase.to_sym
|
40
|
+
schema_file = File.join Abrupt.root, 'assets', 'schema', 'v1', "#{keyname}.json"
|
41
|
+
return values unless File.exist?(schema_file)
|
42
|
+
schema = ::JSON.load(File.read(schema_file)).deep_symbolize_keys
|
43
|
+
# :button => ..., :text => {:type => "array", :items => {...}}
|
44
|
+
schema[:properties][keyname][:properties].each do |state_key, state_schema|
|
45
|
+
set_value(state_key, state_schema, [':state', ":#{keyname}"])
|
46
|
+
end
|
47
|
+
@values
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.set_value(key, schema, ref)
|
51
|
+
ref << ":#{key}"
|
52
|
+
key_string = '[' + ref.join('][') + ']'
|
53
|
+
value = eval "@values#{key_string}" rescue nil
|
54
|
+
return unless value
|
55
|
+
case schema[:type]
|
56
|
+
when 'array'
|
57
|
+
case schema[:items][:type]
|
58
|
+
when 'object'
|
59
|
+
# :name => { :type => :string }
|
60
|
+
schema[:items][:properties].each do |arr_key, arr_val|
|
61
|
+
eval "@values#{key_string} = [value].flatten.compact" unless value.is_a? Array
|
62
|
+
value.each_with_index do |_obj, i|
|
63
|
+
set_value arr_key, arr_val, ref.dup + [i]
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
when 'object'
|
68
|
+
schema[:properties].each do |schema_key, schema_value|
|
69
|
+
set_value(schema_key, schema_value, ref.dup)
|
70
|
+
end
|
71
|
+
else
|
72
|
+
if value.is_a? Array
|
73
|
+
value.each_with_index do |val, i|
|
74
|
+
eval "@values#{key_string}[i] = val.send(*SCHEMA_MAPPING[schema[:type].to_sym])"
|
75
|
+
end
|
76
|
+
else
|
77
|
+
eval "@values#{key_string} = value.send(*SCHEMA_MAPPING[schema[:type].to_sym])"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
# rubocop:enable all
|
83
|
+
|
84
|
+
def add_individuals
|
85
|
+
add_individual
|
86
|
+
return @result unless @values[keyname]
|
87
|
+
@values[keyname].each do |k, v|
|
88
|
+
s = k.to_s.eql?('language') ? "#{keyname}Language" : k
|
89
|
+
add_data_property s, v
|
90
|
+
end
|
91
|
+
@result
|
92
|
+
end
|
93
|
+
|
94
|
+
# Returns the class name
|
95
|
+
def class_name
|
96
|
+
self.class.name.split('::').last
|
97
|
+
end
|
98
|
+
|
99
|
+
# Returns the keyname
|
100
|
+
# @example:
|
101
|
+
# Readability.new(parent_uri, uri).keyname
|
102
|
+
# => :readability
|
103
|
+
def keyname
|
104
|
+
class_name.downcase.to_sym
|
105
|
+
end
|
106
|
+
|
107
|
+
def resolve_parent_uri_part
|
108
|
+
"#{VOC}#{@parent_uri.join('/')}"
|
109
|
+
end
|
110
|
+
|
111
|
+
def resolve_parent_uri
|
112
|
+
RDF::URI(resolve_parent_uri_part)
|
113
|
+
end
|
114
|
+
|
115
|
+
def resolve_uri_part(name)
|
116
|
+
if @uri.empty?
|
117
|
+
"#{class_name}/#{name}"
|
118
|
+
else
|
119
|
+
"#{@uri.join('/')}"
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def resolve_uri(name = nil)
|
124
|
+
name ||= @uri.last
|
125
|
+
RDF::URI(resolve_parent_uri_part + '/' + resolve_uri_part(name))
|
126
|
+
end
|
127
|
+
|
128
|
+
def add_individual(name = @values[:name], klass = nil)
|
129
|
+
klass ||= @uri.empty? ? class_name : @uri.first
|
130
|
+
uri = resolve_uri(name)
|
131
|
+
@result << Statement.new(uri, RDF.type, VOC[klass])
|
132
|
+
@result << Statement.new(resolve_parent_uri, VOC["has#{klass}"], uri)
|
133
|
+
end
|
134
|
+
|
135
|
+
def add_data_property(type, value, name = @values[:name])
|
136
|
+
@result << Statement.new(resolve_uri(name), VOC[type], value)
|
137
|
+
end
|
138
|
+
|
139
|
+
def add_object_property(parent_uri, type, child_uri)
|
140
|
+
parent_uri = RDF::URI(parent_uri) if parent_uri.is_a?(String)
|
141
|
+
@result << Statement.new(parent_uri, VOC["has#{type}"], child_uri)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Transformation
|
4
|
+
module Client
|
5
|
+
# Transformation clas for client visit data
|
6
|
+
class PageView < Transformation::Base
|
7
|
+
def add_individuals
|
8
|
+
datetime = @values['datetime']
|
9
|
+
return @result unless datetime
|
10
|
+
@values[:name] = ::Abrupt.format_time(datetime)
|
11
|
+
super
|
12
|
+
@values.each do |_i, attr|
|
13
|
+
next if attr.is_a?(String)
|
14
|
+
name = attr.name.eql?('name') ? 'inputname' : attr.name
|
15
|
+
value = if name.eql?('datetime')
|
16
|
+
Abrupt.parse_time(attr.value)
|
17
|
+
else
|
18
|
+
CGI.escape(attr.value)
|
19
|
+
end
|
20
|
+
add_data_property(name, value)
|
21
|
+
end
|
22
|
+
@result
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Transformation
|
4
|
+
module Client
|
5
|
+
# Transformation clas for client visit data
|
6
|
+
class Visit < Base
|
7
|
+
def add_individuals
|
8
|
+
@values[:name] = @uri.last
|
9
|
+
super
|
10
|
+
@values.each do |key, value|
|
11
|
+
add_property(key, value) if value
|
12
|
+
end
|
13
|
+
add_individuals_for_view
|
14
|
+
@result
|
15
|
+
end
|
16
|
+
|
17
|
+
def add_property(key, value)
|
18
|
+
enumerable = value.is_a?(Hash) || value.is_a?(Array)
|
19
|
+
return if enumerable # value.is_a?(Enumerable)
|
20
|
+
name = case key
|
21
|
+
when 'uri'
|
22
|
+
uri = [@parent_uri[1], value].map(&:remove_last_slashes)
|
23
|
+
parent_uri_path = (@parent_uri[0..-3] + ['Page', uri.join])
|
24
|
+
parent_uri = "#{VOC}#{parent_uri_path.join('/')}"
|
25
|
+
# Page hasVisit visit
|
26
|
+
add_object_property(parent_uri, 'PageVisit', resolve_uri)
|
27
|
+
key
|
28
|
+
when 'size' # TODO: transform via customize_to_schema
|
29
|
+
'contentlength'
|
30
|
+
else
|
31
|
+
key
|
32
|
+
end
|
33
|
+
add_data_property(name, CGI.escape(value))
|
34
|
+
end
|
35
|
+
|
36
|
+
def add_individuals_for_view
|
37
|
+
page_views = @values[:view]
|
38
|
+
return unless page_views
|
39
|
+
page_views.each do |type, view|
|
40
|
+
[view].flatten.each do |attributes|
|
41
|
+
add_page_view(type.to_s.capitalize, attributes)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def add_page_view(type, attributes)
|
47
|
+
time = ::Abrupt.format_time(attributes[:datetime])
|
48
|
+
page_view = PageView.new(@parent_uri + @uri,
|
49
|
+
[type, time],
|
50
|
+
attributes)
|
51
|
+
@result += page_view.add_individuals
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Transformation
|
4
|
+
module Client
|
5
|
+
# Transformation class for client visit data
|
6
|
+
class Visitor < Transformation::Base
|
7
|
+
def add_individuals
|
8
|
+
return @result unless @values
|
9
|
+
@values[:name] = @values[:ip]
|
10
|
+
super
|
11
|
+
@values.each do |key, value|
|
12
|
+
add_data_property(key, value) if value.is_a?(String)
|
13
|
+
end
|
14
|
+
@result
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Transformation
|
4
|
+
module Website
|
5
|
+
# Complexity service
|
6
|
+
# documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/complexity/public/index.php/api/v1/complexity'
|
7
|
+
class Complexity < Base
|
8
|
+
def add_individuals
|
9
|
+
@uri = @parent_uri.slice!(-2, 2)
|
10
|
+
return @result unless @values[keyname]
|
11
|
+
# flatten vicram complexity
|
12
|
+
@values[keyname][:vicramComplexity] =
|
13
|
+
@values[keyname][:vicram].delete(:complexity)
|
14
|
+
@values[keyname].delete :vicram
|
15
|
+
super
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Transformation
|
4
|
+
module Website
|
5
|
+
# Input service
|
6
|
+
# documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/input/'
|
7
|
+
# schema located in {PROJECT_ROOT}/assets/schema/v1/input.json
|
8
|
+
class Input < Base
|
9
|
+
attr_accessor :form_uri
|
10
|
+
|
11
|
+
def add_individuals
|
12
|
+
return @result unless @values[keyname]
|
13
|
+
form_id = Digest::MD5.hexdigest(@values[keyname].to_s)
|
14
|
+
@uri = ['Form', form_id]
|
15
|
+
add_individual
|
16
|
+
@parent_uri += @uri
|
17
|
+
@values[keyname].each do |input_type, inputs|
|
18
|
+
add_individuals_for_inputs(inputs, input_type)
|
19
|
+
end
|
20
|
+
@result
|
21
|
+
end
|
22
|
+
|
23
|
+
def add_individuals_for_inputs(inputs, input_type)
|
24
|
+
[inputs].flatten.compact.each do |input|
|
25
|
+
form_element_id = input[:id] || Digest::MD5.hexdigest(input.to_s)
|
26
|
+
@uri = [input_type.to_s.camelcase, form_element_id]
|
27
|
+
add_individual
|
28
|
+
add_data_properties input
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def add_data_properties(input)
|
33
|
+
input.each do |type, value|
|
34
|
+
next unless type && value
|
35
|
+
v = value.is_a?(String) ? CGI.escapeHTML(value) : value
|
36
|
+
add_data_property type, v
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|