abrupt 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/.rubocop.yml +16 -0
- data/.travis.yml +34 -0
- data/Gemfile +4 -0
- data/Guardfile +51 -0
- data/LICENSE.txt +22 -0
- data/README.md +36 -0
- data/Rakefile +7 -0
- data/abrupt.gemspec +41 -0
- data/assets/rules/datatypes/cax-RequiredFormElement.ttl +34 -0
- data/assets/rules/datatypes/cax-readability.ttl +18 -0
- data/assets/rules/datatypes/cax-required.ttl +15 -0
- data/assets/rules/list/prp-hasState.ttl +10 -0
- data/assets/rules/production/non_required_form_element.ttl +24 -0
- data/assets/rules/production/state_has_no_html_element.ttl +21 -0
- data/assets/schema/schema.json +49 -0
- data/assets/schema/v1/complexity.json +142 -0
- data/assets/schema/v1/input.json +1136 -0
- data/assets/schema/v1/link.json +41 -0
- data/assets/schema/v1/picture.json +47 -0
- data/assets/schema/v1/readability.json +51 -0
- data/assets/schema/v1/subject.json +88 -0
- data/assets/voc/tbox.ttl +1632 -0
- data/bin/abrupt +63 -0
- data/doc/paper/listings/datatype_rule.ttl +0 -0
- data/doc/paper/listings/description_logic_infered.ttl +3 -0
- data/doc/paper/listings/description_logic_rule.ttl +15 -0
- data/doc/paper/listings/inconsistency_rule.ttl +0 -0
- data/doc/paper/listings/limitations.ttl +10 -0
- data/doc/paper/listings/production_rule.ttl +0 -0
- data/doc/paper/listings/propositional_logic_infered.ttl +6 -0
- data/doc/paper/listings/propositional_logic_rule.ttl +15 -0
- data/doc/paper/listings/unique_nested_uris.ttl +10 -0
- data/doc/paper/literature.bib +56 -0
- data/doc/paper/main.tex +322 -0
- data/doc/poster/Poster.key +0 -0
- data/doc/poster/Poster.pdf +0 -0
- data/doc/poster/poster.indd +0 -0
- data/doc/poster/resources/graph.graffle +0 -0
- data/doc/poster/resources/graph.png +0 -0
- data/doc/poster/resources/graph_crop.png +0 -0
- data/lib/abrupt.rb +90 -0
- data/lib/abrupt/converter.rb +130 -0
- data/lib/abrupt/crawler.rb +125 -0
- data/lib/abrupt/service/absolute_url.rb +32 -0
- data/lib/abrupt/service/base.rb +75 -0
- data/lib/abrupt/service/complexity.rb +27 -0
- data/lib/abrupt/service/input.rb +15 -0
- data/lib/abrupt/service/link.rb +15 -0
- data/lib/abrupt/service/picture.rb +19 -0
- data/lib/abrupt/service/readability.rb +26 -0
- data/lib/abrupt/service/subject.rb +19 -0
- data/lib/abrupt/transformation/base.rb +145 -0
- data/lib/abrupt/transformation/client/base.rb +8 -0
- data/lib/abrupt/transformation/client/page_view.rb +27 -0
- data/lib/abrupt/transformation/client/visit.rb +56 -0
- data/lib/abrupt/transformation/client/visitor.rb +19 -0
- data/lib/abrupt/transformation/website/base.rb +8 -0
- data/lib/abrupt/transformation/website/complexity.rb +20 -0
- data/lib/abrupt/transformation/website/input.rb +42 -0
- data/lib/abrupt/transformation/website/link.rb +27 -0
- data/lib/abrupt/transformation/website/picture.rb +26 -0
- data/lib/abrupt/transformation/website/readability.rb +15 -0
- data/lib/abrupt/transformation/website/subject.rb +22 -0
- data/lib/abrupt/version.rb +7 -0
- data/spec/cassettes/Abrupt_Crawler/outputs_correct_hash.yml +91250 -0
- data/spec/converter_spec.rb +34 -0
- data/spec/crawler_spec.rb +11 -0
- data/spec/factories/crawled_hashes.rb +468 -0
- data/spec/fixtures/rikscha-mainz.owl +17456 -0
- data/spec/fixtures/rikscha.ohneBilder.2013-04-30_2013-08-17.xml +51759 -0
- data/spec/fixtures/rikscha.ohneBilder.2013-04-30_2013-08-17_min.xml +81 -0
- data/spec/fixtures/rikscha_Result.xml +11594 -0
- data/spec/fixtures/rikscha_Result_min.xml +574 -0
- data/spec/spec_helper.rb +26 -0
- data/spec/transformation/base_spec.rb +18 -0
- data/spec/transformation/website/complexity_spec.rb +188 -0
- data/spec/transformation/website/input_spec.rb +181 -0
- data/spec/transformation/website/link_spec.rb +13 -0
- data/spec/transformation/website/picture_spec.rb +20 -0
- data/spec/transformation/website/readability_spec.rb +22 -0
- data/spec/transformation/website/subject_spec.rb +40 -0
- metadata +424 -0
@@ -0,0 +1,27 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Service
|
4
|
+
# Complexity service
|
5
|
+
# documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/complexity/public/index.php/api/v1/complexity'
|
6
|
+
class Complexity < Base
|
7
|
+
# TODO: outsource service uri to module Service
|
8
|
+
SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/complexity/public/index.php/api/v1/complexity'
|
9
|
+
|
10
|
+
def self.available_options
|
11
|
+
%w(adblock vicram vizweb color contrast ratio)
|
12
|
+
end
|
13
|
+
|
14
|
+
def service_uri
|
15
|
+
SERVICE_URI
|
16
|
+
end
|
17
|
+
|
18
|
+
def execute
|
19
|
+
super
|
20
|
+
@response['differenceMatrix']['matrix'].flatten!
|
21
|
+
@response['differenceMatrix']['palette'].flatten!
|
22
|
+
@response['contrast']['_1'] = @response['contrast'].delete '1'
|
23
|
+
@response
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Service
|
4
|
+
# Readability service
|
5
|
+
# documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
|
6
|
+
class Input < Base
|
7
|
+
# TODO: outsource service uri to module Service
|
8
|
+
SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/input/'
|
9
|
+
|
10
|
+
def service_uri
|
11
|
+
SERVICE_URI
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Service
|
4
|
+
# Readability service
|
5
|
+
# documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
|
6
|
+
class Link < Base
|
7
|
+
# TODO: outsource service uri to module Service
|
8
|
+
SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/link/'
|
9
|
+
|
10
|
+
def service_uri
|
11
|
+
SERVICE_URI
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Service
|
4
|
+
# Readability service
|
5
|
+
# documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
|
6
|
+
class Picture < Base
|
7
|
+
# TODO: outsource service uri to module Service
|
8
|
+
SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/picture/'
|
9
|
+
|
10
|
+
def self.available_options
|
11
|
+
['url']
|
12
|
+
end
|
13
|
+
|
14
|
+
def service_uri
|
15
|
+
SERVICE_URI
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Service
|
4
|
+
# Readability service
|
5
|
+
# documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
|
6
|
+
class Readability < Base
|
7
|
+
# TODO: outsource service uri to module Service
|
8
|
+
SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
|
9
|
+
|
10
|
+
def self.available_options
|
11
|
+
['lang']
|
12
|
+
end
|
13
|
+
|
14
|
+
def service_uri
|
15
|
+
SERVICE_URI
|
16
|
+
end
|
17
|
+
|
18
|
+
def execute
|
19
|
+
super
|
20
|
+
superfluous_keys = %w(originalText hyphenText)
|
21
|
+
@response.delete_if { |key, _value| superfluous_keys.include?(key) }
|
22
|
+
@response
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Service
|
4
|
+
# Readability service
|
5
|
+
# documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/readability/'
|
6
|
+
class Subject < Base
|
7
|
+
# TODO: outsource service uri to module Service
|
8
|
+
SERVICE_URI = 'http://wba.cs.hs-rm.de/AbRUPt/service/subject/'
|
9
|
+
|
10
|
+
def self.available_options
|
11
|
+
%w(lang word_limit depth)
|
12
|
+
end
|
13
|
+
|
14
|
+
def service_uri
|
15
|
+
SERVICE_URI
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,145 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Transformation
|
4
|
+
# base class
|
5
|
+
class Base
|
6
|
+
include RDF
|
7
|
+
attr_accessor :parent_uri, :uri, :values, :result, :md5
|
8
|
+
|
9
|
+
SCHEMA_MAPPING = {
|
10
|
+
integer: :to_i,
|
11
|
+
number: :to_f,
|
12
|
+
string: :to_s,
|
13
|
+
boolean: [:kind_of?, Object]
|
14
|
+
}
|
15
|
+
|
16
|
+
# Initializes Transformer for Individual Statement for parent_uri & uri.
|
17
|
+
# @param parent_uri [Array] the parent uri in array structure of paths
|
18
|
+
# @param uri [Array] the uri as array structure of path and id
|
19
|
+
# @example
|
20
|
+
# Readability.new([
|
21
|
+
# 'Website',
|
22
|
+
# 'http://www.rikscha-mainz.de',
|
23
|
+
# 'Page',
|
24
|
+
# 'http://www.rikscha-mainz.de/Angebote'
|
25
|
+
# ], [
|
26
|
+
# 'State',
|
27
|
+
# 'state54'
|
28
|
+
# ])
|
29
|
+
def initialize(parent_uri, uri, values = {})
|
30
|
+
@parent_uri = parent_uri.to_a.map(&:remove_last_slashes)
|
31
|
+
@uri = uri.to_a.map(&:remove_last_slashes)
|
32
|
+
@values = values
|
33
|
+
@result = []
|
34
|
+
end
|
35
|
+
|
36
|
+
# rubocop:disable all
|
37
|
+
def self.customize_to_schema(values)
|
38
|
+
@values = values
|
39
|
+
keyname = name.split('::').last.downcase.to_sym
|
40
|
+
schema_file = File.join Abrupt.root, 'assets', 'schema', 'v1', "#{keyname}.json"
|
41
|
+
return values unless File.exist?(schema_file)
|
42
|
+
schema = ::JSON.load(File.read(schema_file)).deep_symbolize_keys
|
43
|
+
# :button => ..., :text => {:type => "array", :items => {...}}
|
44
|
+
schema[:properties][keyname][:properties].each do |state_key, state_schema|
|
45
|
+
set_value(state_key, state_schema, [':state', ":#{keyname}"])
|
46
|
+
end
|
47
|
+
@values
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.set_value(key, schema, ref)
|
51
|
+
ref << ":#{key}"
|
52
|
+
key_string = '[' + ref.join('][') + ']'
|
53
|
+
value = eval "@values#{key_string}" rescue nil
|
54
|
+
return unless value
|
55
|
+
case schema[:type]
|
56
|
+
when 'array'
|
57
|
+
case schema[:items][:type]
|
58
|
+
when 'object'
|
59
|
+
# :name => { :type => :string }
|
60
|
+
schema[:items][:properties].each do |arr_key, arr_val|
|
61
|
+
eval "@values#{key_string} = [value].flatten.compact" unless value.is_a? Array
|
62
|
+
value.each_with_index do |_obj, i|
|
63
|
+
set_value arr_key, arr_val, ref.dup + [i]
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
when 'object'
|
68
|
+
schema[:properties].each do |schema_key, schema_value|
|
69
|
+
set_value(schema_key, schema_value, ref.dup)
|
70
|
+
end
|
71
|
+
else
|
72
|
+
if value.is_a? Array
|
73
|
+
value.each_with_index do |val, i|
|
74
|
+
eval "@values#{key_string}[i] = val.send(*SCHEMA_MAPPING[schema[:type].to_sym])"
|
75
|
+
end
|
76
|
+
else
|
77
|
+
eval "@values#{key_string} = value.send(*SCHEMA_MAPPING[schema[:type].to_sym])"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
# rubocop:enable all
|
83
|
+
|
84
|
+
def add_individuals
|
85
|
+
add_individual
|
86
|
+
return @result unless @values[keyname]
|
87
|
+
@values[keyname].each do |k, v|
|
88
|
+
s = k.to_s.eql?('language') ? "#{keyname}Language" : k
|
89
|
+
add_data_property s, v
|
90
|
+
end
|
91
|
+
@result
|
92
|
+
end
|
93
|
+
|
94
|
+
# Returns the class name
|
95
|
+
def class_name
|
96
|
+
self.class.name.split('::').last
|
97
|
+
end
|
98
|
+
|
99
|
+
# Returns the keyname
|
100
|
+
# @example:
|
101
|
+
# Readability.new(parent_uri, uri).keyname
|
102
|
+
# => :readability
|
103
|
+
def keyname
|
104
|
+
class_name.downcase.to_sym
|
105
|
+
end
|
106
|
+
|
107
|
+
def resolve_parent_uri_part
|
108
|
+
"#{VOC}#{@parent_uri.join('/')}"
|
109
|
+
end
|
110
|
+
|
111
|
+
def resolve_parent_uri
|
112
|
+
RDF::URI(resolve_parent_uri_part)
|
113
|
+
end
|
114
|
+
|
115
|
+
def resolve_uri_part(name)
|
116
|
+
if @uri.empty?
|
117
|
+
"#{class_name}/#{name}"
|
118
|
+
else
|
119
|
+
"#{@uri.join('/')}"
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def resolve_uri(name = nil)
|
124
|
+
name ||= @uri.last
|
125
|
+
RDF::URI(resolve_parent_uri_part + '/' + resolve_uri_part(name))
|
126
|
+
end
|
127
|
+
|
128
|
+
def add_individual(name = @values[:name], klass = nil)
|
129
|
+
klass ||= @uri.empty? ? class_name : @uri.first
|
130
|
+
uri = resolve_uri(name)
|
131
|
+
@result << Statement.new(uri, RDF.type, VOC[klass])
|
132
|
+
@result << Statement.new(resolve_parent_uri, VOC["has#{klass}"], uri)
|
133
|
+
end
|
134
|
+
|
135
|
+
def add_data_property(type, value, name = @values[:name])
|
136
|
+
@result << Statement.new(resolve_uri(name), VOC[type], value)
|
137
|
+
end
|
138
|
+
|
139
|
+
def add_object_property(parent_uri, type, child_uri)
|
140
|
+
parent_uri = RDF::URI(parent_uri) if parent_uri.is_a?(String)
|
141
|
+
@result << Statement.new(parent_uri, VOC["has#{type}"], child_uri)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Transformation
|
4
|
+
module Client
|
5
|
+
# Transformation clas for client visit data
|
6
|
+
class PageView < Transformation::Base
|
7
|
+
def add_individuals
|
8
|
+
datetime = @values['datetime']
|
9
|
+
return @result unless datetime
|
10
|
+
@values[:name] = ::Abrupt.format_time(datetime)
|
11
|
+
super
|
12
|
+
@values.each do |_i, attr|
|
13
|
+
next if attr.is_a?(String)
|
14
|
+
name = attr.name.eql?('name') ? 'inputname' : attr.name
|
15
|
+
value = if name.eql?('datetime')
|
16
|
+
Abrupt.parse_time(attr.value)
|
17
|
+
else
|
18
|
+
CGI.escape(attr.value)
|
19
|
+
end
|
20
|
+
add_data_property(name, value)
|
21
|
+
end
|
22
|
+
@result
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Transformation
|
4
|
+
module Client
|
5
|
+
# Transformation clas for client visit data
|
6
|
+
class Visit < Base
|
7
|
+
def add_individuals
|
8
|
+
@values[:name] = @uri.last
|
9
|
+
super
|
10
|
+
@values.each do |key, value|
|
11
|
+
add_property(key, value) if value
|
12
|
+
end
|
13
|
+
add_individuals_for_view
|
14
|
+
@result
|
15
|
+
end
|
16
|
+
|
17
|
+
def add_property(key, value)
|
18
|
+
enumerable = value.is_a?(Hash) || value.is_a?(Array)
|
19
|
+
return if enumerable # value.is_a?(Enumerable)
|
20
|
+
name = case key
|
21
|
+
when 'uri'
|
22
|
+
uri = [@parent_uri[1], value].map(&:remove_last_slashes)
|
23
|
+
parent_uri_path = (@parent_uri[0..-3] + ['Page', uri.join])
|
24
|
+
parent_uri = "#{VOC}#{parent_uri_path.join('/')}"
|
25
|
+
# Page hasVisit visit
|
26
|
+
add_object_property(parent_uri, 'PageVisit', resolve_uri)
|
27
|
+
key
|
28
|
+
when 'size' # TODO: transform via customize_to_schema
|
29
|
+
'contentlength'
|
30
|
+
else
|
31
|
+
key
|
32
|
+
end
|
33
|
+
add_data_property(name, CGI.escape(value))
|
34
|
+
end
|
35
|
+
|
36
|
+
def add_individuals_for_view
|
37
|
+
page_views = @values[:view]
|
38
|
+
return unless page_views
|
39
|
+
page_views.each do |type, view|
|
40
|
+
[view].flatten.each do |attributes|
|
41
|
+
add_page_view(type.to_s.capitalize, attributes)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def add_page_view(type, attributes)
|
47
|
+
time = ::Abrupt.format_time(attributes[:datetime])
|
48
|
+
page_view = PageView.new(@parent_uri + @uri,
|
49
|
+
[type, time],
|
50
|
+
attributes)
|
51
|
+
@result += page_view.add_individuals
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Transformation
|
4
|
+
module Client
|
5
|
+
# Transformation class for client visit data
|
6
|
+
class Visitor < Transformation::Base
|
7
|
+
def add_individuals
|
8
|
+
return @result unless @values
|
9
|
+
@values[:name] = @values[:ip]
|
10
|
+
super
|
11
|
+
@values.each do |key, value|
|
12
|
+
add_data_property(key, value) if value.is_a?(String)
|
13
|
+
end
|
14
|
+
@result
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Transformation
|
4
|
+
module Website
|
5
|
+
# Complexity service
|
6
|
+
# documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/complexity/public/index.php/api/v1/complexity'
|
7
|
+
class Complexity < Base
|
8
|
+
def add_individuals
|
9
|
+
@uri = @parent_uri.slice!(-2, 2)
|
10
|
+
return @result unless @values[keyname]
|
11
|
+
# flatten vicram complexity
|
12
|
+
@values[keyname][:vicramComplexity] =
|
13
|
+
@values[keyname][:vicram].delete(:complexity)
|
14
|
+
@values[keyname].delete :vicram
|
15
|
+
super
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# @author Manuel Dudda
|
2
|
+
module Abrupt
|
3
|
+
module Transformation
|
4
|
+
module Website
|
5
|
+
# Input service
|
6
|
+
# documentation see 'http://wba.cs.hs-rm.de/AbRUPt/service/input/'
|
7
|
+
# schema located in {PROJECT_ROOT}/assets/schema/v1/input.json
|
8
|
+
class Input < Base
|
9
|
+
attr_accessor :form_uri
|
10
|
+
|
11
|
+
def add_individuals
|
12
|
+
return @result unless @values[keyname]
|
13
|
+
form_id = Digest::MD5.hexdigest(@values[keyname].to_s)
|
14
|
+
@uri = ['Form', form_id]
|
15
|
+
add_individual
|
16
|
+
@parent_uri += @uri
|
17
|
+
@values[keyname].each do |input_type, inputs|
|
18
|
+
add_individuals_for_inputs(inputs, input_type)
|
19
|
+
end
|
20
|
+
@result
|
21
|
+
end
|
22
|
+
|
23
|
+
def add_individuals_for_inputs(inputs, input_type)
|
24
|
+
[inputs].flatten.compact.each do |input|
|
25
|
+
form_element_id = input[:id] || Digest::MD5.hexdigest(input.to_s)
|
26
|
+
@uri = [input_type.to_s.camelcase, form_element_id]
|
27
|
+
add_individual
|
28
|
+
add_data_properties input
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def add_data_properties(input)
|
33
|
+
input.each do |type, value|
|
34
|
+
next unless type && value
|
35
|
+
v = value.is_a?(String) ? CGI.escapeHTML(value) : value
|
36
|
+
add_data_property type, v
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|