abrupt 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (84) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +22 -0
  3. data/.rubocop.yml +16 -0
  4. data/.travis.yml +34 -0
  5. data/Gemfile +4 -0
  6. data/Guardfile +51 -0
  7. data/LICENSE.txt +22 -0
  8. data/README.md +36 -0
  9. data/Rakefile +7 -0
  10. data/abrupt.gemspec +41 -0
  11. data/assets/rules/datatypes/cax-RequiredFormElement.ttl +34 -0
  12. data/assets/rules/datatypes/cax-readability.ttl +18 -0
  13. data/assets/rules/datatypes/cax-required.ttl +15 -0
  14. data/assets/rules/list/prp-hasState.ttl +10 -0
  15. data/assets/rules/production/non_required_form_element.ttl +24 -0
  16. data/assets/rules/production/state_has_no_html_element.ttl +21 -0
  17. data/assets/schema/schema.json +49 -0
  18. data/assets/schema/v1/complexity.json +142 -0
  19. data/assets/schema/v1/input.json +1136 -0
  20. data/assets/schema/v1/link.json +41 -0
  21. data/assets/schema/v1/picture.json +47 -0
  22. data/assets/schema/v1/readability.json +51 -0
  23. data/assets/schema/v1/subject.json +88 -0
  24. data/assets/voc/tbox.ttl +1632 -0
  25. data/bin/abrupt +63 -0
  26. data/doc/paper/listings/datatype_rule.ttl +0 -0
  27. data/doc/paper/listings/description_logic_infered.ttl +3 -0
  28. data/doc/paper/listings/description_logic_rule.ttl +15 -0
  29. data/doc/paper/listings/inconsistency_rule.ttl +0 -0
  30. data/doc/paper/listings/limitations.ttl +10 -0
  31. data/doc/paper/listings/production_rule.ttl +0 -0
  32. data/doc/paper/listings/propositional_logic_infered.ttl +6 -0
  33. data/doc/paper/listings/propositional_logic_rule.ttl +15 -0
  34. data/doc/paper/listings/unique_nested_uris.ttl +10 -0
  35. data/doc/paper/literature.bib +56 -0
  36. data/doc/paper/main.tex +322 -0
  37. data/doc/poster/Poster.key +0 -0
  38. data/doc/poster/Poster.pdf +0 -0
  39. data/doc/poster/poster.indd +0 -0
  40. data/doc/poster/resources/graph.graffle +0 -0
  41. data/doc/poster/resources/graph.png +0 -0
  42. data/doc/poster/resources/graph_crop.png +0 -0
  43. data/lib/abrupt.rb +90 -0
  44. data/lib/abrupt/converter.rb +130 -0
  45. data/lib/abrupt/crawler.rb +125 -0
  46. data/lib/abrupt/service/absolute_url.rb +32 -0
  47. data/lib/abrupt/service/base.rb +75 -0
  48. data/lib/abrupt/service/complexity.rb +27 -0
  49. data/lib/abrupt/service/input.rb +15 -0
  50. data/lib/abrupt/service/link.rb +15 -0
  51. data/lib/abrupt/service/picture.rb +19 -0
  52. data/lib/abrupt/service/readability.rb +26 -0
  53. data/lib/abrupt/service/subject.rb +19 -0
  54. data/lib/abrupt/transformation/base.rb +145 -0
  55. data/lib/abrupt/transformation/client/base.rb +8 -0
  56. data/lib/abrupt/transformation/client/page_view.rb +27 -0
  57. data/lib/abrupt/transformation/client/visit.rb +56 -0
  58. data/lib/abrupt/transformation/client/visitor.rb +19 -0
  59. data/lib/abrupt/transformation/website/base.rb +8 -0
  60. data/lib/abrupt/transformation/website/complexity.rb +20 -0
  61. data/lib/abrupt/transformation/website/input.rb +42 -0
  62. data/lib/abrupt/transformation/website/link.rb +27 -0
  63. data/lib/abrupt/transformation/website/picture.rb +26 -0
  64. data/lib/abrupt/transformation/website/readability.rb +15 -0
  65. data/lib/abrupt/transformation/website/subject.rb +22 -0
  66. data/lib/abrupt/version.rb +7 -0
  67. data/spec/cassettes/Abrupt_Crawler/outputs_correct_hash.yml +91250 -0
  68. data/spec/converter_spec.rb +34 -0
  69. data/spec/crawler_spec.rb +11 -0
  70. data/spec/factories/crawled_hashes.rb +468 -0
  71. data/spec/fixtures/rikscha-mainz.owl +17456 -0
  72. data/spec/fixtures/rikscha.ohneBilder.2013-04-30_2013-08-17.xml +51759 -0
  73. data/spec/fixtures/rikscha.ohneBilder.2013-04-30_2013-08-17_min.xml +81 -0
  74. data/spec/fixtures/rikscha_Result.xml +11594 -0
  75. data/spec/fixtures/rikscha_Result_min.xml +574 -0
  76. data/spec/spec_helper.rb +26 -0
  77. data/spec/transformation/base_spec.rb +18 -0
  78. data/spec/transformation/website/complexity_spec.rb +188 -0
  79. data/spec/transformation/website/input_spec.rb +181 -0
  80. data/spec/transformation/website/link_spec.rb +13 -0
  81. data/spec/transformation/website/picture_spec.rb +20 -0
  82. data/spec/transformation/website/readability_spec.rb +22 -0
  83. data/spec/transformation/website/subject_spec.rb +40 -0
  84. metadata +424 -0
@@ -0,0 +1,34 @@
1
+ require 'spec_helper'
2
+ describe Abrupt::Converter, :vcr do
3
+ let(:website_data_file) { 'spec/fixtures/rikscha_Result_min.xml' }
4
+ let(:website_repo_file) { 'spec/fixtures/rikscha-mainz.ttl' }
5
+ before(:each) { Abrupt::Converter.instance.init }
6
+
7
+ context '#append_website_data' do
8
+ it 'should convert to hash with customized datatypes' do
9
+ crawled_hash = FactoryGirl.attributes_for(:rikscha_website_data)
10
+ crawled_hash = crawled_hash.deep_symbolize_keys[:data]
11
+ Abrupt::Converter.instance.append_website_data(website_data_file)
12
+ expect(Abrupt::Converter.instance.hsh).to eql(crawled_hash)
13
+ end
14
+
15
+ it 'should validates with json schema' do
16
+ schema_filename = 'assets/schema/schema.json'
17
+ Abrupt::Converter.instance.append_website_data(website_data_file)
18
+ expect do
19
+ JSON::Validator.validate!(schema_filename,
20
+ Abrupt::Converter.instance.hsh.to_json)
21
+ end.not_to raise_error
22
+ end
23
+ end
24
+
25
+ context 'converting' do
26
+ it 'from hash to the correct repo' do
27
+ pending 'dev'
28
+ crawled_hash = FactoryGirl.attributes_for(:rikscha_converted)
29
+ converted_repo = Abrupt::Converter.to_repository crawled_hash
30
+ expected_repo = RDF::Repository.load(website_repo_file)
31
+ expect(converted_repo).to be_isomorphic_with(expected_repo)
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,11 @@
1
+ require 'spec_helper'
2
+ describe Abrupt::Crawler, :vcr do
3
+ it 'outputs correct hash', beta: true do
4
+ pending 'out of development'
5
+ crawler = Abrupt::Crawler.new 'http://www.rikscha-mainz.de', lang: 'de'
6
+ result = crawler.crawl
7
+ expected_hash = FactoryGirl.attributes_for(:rikscha)
8
+ expected_hash = expected_hash.deep_symbolize_keys[:data]
9
+ expect(result).to eq(expected_hash)
10
+ end
11
+ end
@@ -0,0 +1,468 @@
1
+ FactoryGirl.define do
2
+ factory :rikscha_website_data, class: Hash do
3
+ data do
4
+ { :website =>
5
+ { :domain => "http://www.rikscha-mainz.de",
6
+ :url =>
7
+ [{ :name => "http://www.rikscha-mainz.de",
8
+ :state =>
9
+ { :name => "index",
10
+ :readability =>
11
+ { :readability => 5.288413836478,
12
+ :syllables => 395,
13
+ :words => 225,
14
+ :numberOfLinks => 12,
15
+ :bigwords => 48,
16
+ :sentences => 53,
17
+ :language => "de" },
18
+ :input => nil,
19
+ :subject =>
20
+ { :words => ["unterbreiten", "document"],
21
+ :subjects =>
22
+ { :Liste__Sprache_ => "3",
23
+ :Portal_Abk => "3",
24
+ :R => "3",
25
+ :Begriffskl => "3",
26
+ :Portal_Kommunikation_als_Thema => "3",
27
+ :Abk => "3",
28
+ :Liste__Abk => "3",
29
+ :Kofferwort => "3",
30
+ :Zeitliche_Systematik => "3",
31
+ :Liste => "3",
32
+ :Einzelwort => "3" },
33
+ :wordlimit => "20",
34
+ :classLimit => 17,
35
+ :language => "de",
36
+ :depth => 3 },
37
+ :complexity =>
38
+ { :contrast =>
39
+ { :_1 =>
40
+ { :A_tag_With_Low_Contrast => "0",
41
+ :TextNodes_With_Low_Contrast => "0",
42
+ :paragrahps_with_too_long_lines => "0" } },
43
+ :vizweb =>
44
+ { :numberOfLeaves => 1,
45
+ :textArea => "0",
46
+ :numberOfTextGroups => 0,
47
+ :visualComplexity => 0.79690559961371,
48
+ :numberOfImageAreas => 0,
49
+ :hue => 0.78842163085938,
50
+ :horizontalSymmetry => "1",
51
+ :horizontalBalance => "null",
52
+ :colorfulness => 13.723379102371,
53
+ :nontextArea => 1.3333333333333 },
54
+ :differenceMatrix =>
55
+ [{ :matrix =>
56
+ [0.0,
57
+ 37.735924528226,
58
+ 69.0,
59
+ 18.0,
60
+ 41.569219381653,
61
+ 24.657656011876,
62
+ 12.649110640674,
63
+ 37.735924528226,
64
+ 0.0,
65
+ 34.885527085025,
66
+ 21.260291625469,
67
+ 25.612496949731,
68
+ 26.832815729997,
69
+ 33.226495451672,
70
+ 69.0,
71
+ 34.885527085025,
72
+ 0.0,
73
+ 51.0,
74
+ 56.364882684168,
75
+ 60.934390946328,
76
+ 66.098411478643,
77
+ 18.0,
78
+ 21.260291625469,
79
+ 51.0,
80
+ 0.0,
81
+ 34.467375879228,
82
+ 22.360679774998,
83
+ 18.439088914586,
84
+ 41.569219381653,
85
+ 25.612496949731,
86
+ 56.364882684168,
87
+ 34.467375879228,
88
+ 0.0,
89
+ 20.396078054371,
90
+ 33.466401061363,
91
+ 24.657656011876,
92
+ 26.832815729997,
93
+ 60.934390946328,
94
+ 22.360679774998,
95
+ 20.396078054371,
96
+ 0.0,
97
+ 21.908902300207,
98
+ 12.649110640674,
99
+ 33.226495451672,
100
+ 66.098411478643,
101
+ 18.439088914586,
102
+ 33.466401061363,
103
+ 21.908902300207,
104
+ 0.0],
105
+ :palette =>
106
+ [4.0,
107
+ 4.0,
108
+ 4.0,
109
+ 12.0,
110
+ 12.0,
111
+ 40.0,
112
+ 4.0,
113
+ 4.0,
114
+ 73.0,
115
+ 4.0,
116
+ 4.0,
117
+ 22.0,
118
+ 28.0,
119
+ 28.0,
120
+ 28.0,
121
+ 12.0,
122
+ 24.0,
123
+ 16.0,
124
+ 16.0,
125
+ 4.0,
126
+ 8.0] }],
127
+ :ratios =>
128
+ { :pageSize =>
129
+ { :pageSize_in_pixel_squared => 289600,
130
+ :pageWidth_in_pixel => 400,
131
+ :pageHeight_in_pixel => 724 },
132
+ :img_Tag =>
133
+ { :img_Area_in_pixel_squared => 2730,
134
+ :img_document_tag_percentage => 0.94267955801105,
135
+ :img_document_tag_ratio => 106.08058608059 } },
136
+ :vicramComplexity => 0.6395 },
137
+ :link =>
138
+ { :a =>
139
+ [{ :href => "http://www.rikscha-mainz.de" },
140
+ { :plaintext => "sprechen sie uns an", :href => "/Kontakt/" }] },
141
+ :picture =>
142
+ { :duration => 3.9,
143
+ :images =>
144
+ [{ :mimetype => "image/JPEG",
145
+ :duration => 3.9,
146
+ :filename => "random.jpg",
147
+ :type => "normal",
148
+ :geometry => "285x379" },
149
+ { :mimetype => "image/PNG",
150
+ :duration => 0.0,
151
+ :filename => "RikschaLogo.png",
152
+ :type => "article",
153
+ :geometry => "135x111" }] } } },
154
+ { :name => "http://www.rikscha-mainz.de/Kontakt/",
155
+ :state =>
156
+ { :name => "state5",
157
+ :readability =>
158
+ { :readability => 5.1171449882608,
159
+ :syllables => 399,
160
+ :words => 226,
161
+ :numberOfLinks => 13,
162
+ :bigwords => 46,
163
+ :sentences => 49,
164
+ :language => "de" },
165
+ :input =>
166
+ { :text =>
167
+ [{ :id => "termin_von",
168
+ :maxlength => 10,
169
+ :name => "von",
170
+ :value => nil,
171
+ :class => "text",
172
+ :required => nil,
173
+ :type => "text",
174
+ :size => 10 },
175
+ { :id => "termin_bis",
176
+ :maxlength => 10,
177
+ :name => "bis",
178
+ :value => nil,
179
+ :class => "text",
180
+ :type => "text",
181
+ :size => 10 },
182
+ { :id => "termin_zeit",
183
+ :maxlength => 5,
184
+ :name => "zeit",
185
+ :value => nil,
186
+ :class => "textdisabled",
187
+ :type => "text",
188
+ :disabled => nil,
189
+ :size => 5 },
190
+ { :id => "name",
191
+ :maxlength => 60,
192
+ :placeholder => "Vorname Nachname",
193
+ :name => "name",
194
+ :value => nil,
195
+ :class => "text",
196
+ :required => nil,
197
+ :type => "text",
198
+ :size => 40 },
199
+ { :maxlength => 60,
200
+ :placeholder => "Straße Hausnummer",
201
+ :name => "strasse",
202
+ :value => nil,
203
+ :class => "text",
204
+ :type => "text",
205
+ :size => 40 },
206
+ { :maxlength => 60,
207
+ :placeholder => "PLZ Ort",
208
+ :name => "ort",
209
+ :value => nil,
210
+ :class => "text",
211
+ :type => "text",
212
+ :size => 40 },
213
+ { :maxlength => 60,
214
+ :name => "telefon",
215
+ :value => nil,
216
+ :class => "text",
217
+ :type => "text",
218
+ :size => 40 },
219
+ { :id => "lieferungOrt",
220
+ :placeholder => "Lieferort",
221
+ :name => "lieferungOrt",
222
+ :value => nil,
223
+ :type => "text",
224
+ :size => 20 },
225
+ { :maxlength => 60,
226
+ :name => "aufmerksam_sonstiges",
227
+ :value => nil,
228
+ :class => "text",
229
+ :type => "text",
230
+ :size => 40 }],
231
+ :submit =>
232
+ [{ :name => "action",
233
+ :value => "Anfrage senden",
234
+ :onclick => "return(senden());",
235
+ :class => "bold",
236
+ :type => "submit" }],
237
+ :email =>
238
+ [{ :id => "email",
239
+ :maxlength => 60,
240
+ :name => "email",
241
+ :value => nil,
242
+ :class => "text",
243
+ :required => nil,
244
+ :type => "email",
245
+ :size => 40 }],
246
+ :textarea => [{ :cols => 60, :name => "bemerkung", :rows => 5 }],
247
+ :checkbox =>
248
+ [{ :id => "blumenSeide",
249
+ :name => "blumen",
250
+ :value => "Seidenrosen",
251
+ :type => "checkbox" },
252
+ { :id => "herz", :name => "herz", :value => "Herz", :type => "checkbox" },
253
+ { :id => "aufmerksam_internet",
254
+ :name => "aufmerksam_internet",
255
+ :value => "Internet",
256
+ :type => "checkbox" },
257
+ { :id => "aufmerksam_messe",
258
+ :name => "aufmerksam_messe",
259
+ :value => "Messe",
260
+ :type => "checkbox" },
261
+ { :id => "aufmerksam_zeitschrift",
262
+ :name => "aufmerksam_zeitschrift",
263
+ :value => "Zeitschrift",
264
+ :type => "checkbox" },
265
+ { :id => "aufmerksam_empfehlung",
266
+ :name => "aufmerksam_empfehlung",
267
+ :value => "Empfehlung",
268
+ :type => "checkbox" },
269
+ { :id => "agb",
270
+ :name => "agb",
271
+ :value => "ja",
272
+ :required => nil,
273
+ :type => "checkbox" }],
274
+ :radio =>
275
+ [{ :id => "angebot_3tage",
276
+ :name => "angebot",
277
+ :value => "3 Tage",
278
+ :onclick => "angebotGewaehlt('3Tage');",
279
+ :required => nil,
280
+ :type => "radio" },
281
+ { :id => "angebot_hz_fahrer",
282
+ :name => "angebot",
283
+ :value => "HZ-Fahrer",
284
+ :onclick => "angebotGewaehlt('HZ-Fahrer');",
285
+ :required => nil,
286
+ :type => "radio" },
287
+ { :id => "angebot_2tage",
288
+ :name => "angebot",
289
+ :value => "2 Tage",
290
+ :onclick => "angebotGewaehlt('2Tage');",
291
+ :required => nil,
292
+ :type => "radio" },
293
+ { :id => "angebot_rikschafahrt",
294
+ :name => "angebot",
295
+ :value => "Rikschafahrt",
296
+ :onclick => "angebotGewaehlt('Rikschafahrt');",
297
+ :required => nil,
298
+ :type => "radio" },
299
+ { :id => "angebot_1tag",
300
+ :name => "angebot",
301
+ :value => "1 Tag",
302
+ :onclick => "angebotGewaehlt('1Tag');",
303
+ :required => nil,
304
+ :type => "radio" },
305
+ { :id => "angebot_mondscheinfahrt",
306
+ :name => "angebot",
307
+ :value => "Mondscheinfahrt",
308
+ :onclick => "angebotGewaehlt('Mondscheinfahrt');",
309
+ :required => nil,
310
+ :type => "radio" },
311
+ { :id => "angebot_sonstiges",
312
+ :name => "angebot",
313
+ :value => "sonstiges",
314
+ :onclick => "angebotGewaehlt('sonstiges');",
315
+ :required => nil,
316
+ :type => "radio" },
317
+ { :id => "angebot_picknicktour",
318
+ :name => "angebot",
319
+ :value => "Picknicktour",
320
+ :onclick => "angebotGewaehlt('Picknicktour');",
321
+ :required => nil,
322
+ :type => "radio" },
323
+ { :id => "angebot_sonstigesFahrer",
324
+ :name => "angebot",
325
+ :value => "sonstiges Fahrer",
326
+ :onclick => "angebotGewaehlt('sonstigesFahrer');",
327
+ :required => nil,
328
+ :type => "radio" },
329
+ { :id => "lieferungBringen",
330
+ :name => "lieferung",
331
+ :value => "Bringen",
332
+ :type => "radio",
333
+ :checked => nil },
334
+ { :id => "lieferungAbholung",
335
+ :name => "lieferung",
336
+ :value => "Abholung",
337
+ :type => "radio" },
338
+ { :id => "lieferungAnhaenger",
339
+ :name => "lieferung",
340
+ :value => "Anhänger",
341
+ :type => "radio" }] },
342
+ :subject =>
343
+ { :words => ["input", "google"],
344
+ :subjects =>
345
+ { :Liste__Sprache_ => "3",
346
+ :Portal_Abk => "3",
347
+ :R => "3",
348
+ :Begriffskl => "3",
349
+ :Portal_Kommunikation_als_Thema => "3",
350
+ :Abk => "3",
351
+ :Liste__Abk => "3",
352
+ :Kofferwort => "3",
353
+ :Zeitliche_Systematik => "3",
354
+ :Liste => "3",
355
+ :Einzelwort => "3" },
356
+ :wordlimit => "20",
357
+ :classLimit => 18,
358
+ :language => "de",
359
+ :depth => 3 },
360
+ :complexity =>
361
+ { :contrast =>
362
+ { :_1 =>
363
+ { :A_tag_With_Low_Contrast => "0",
364
+ :TextNodes_With_Low_Contrast => "0",
365
+ :paragrahps_with_too_long_lines => "0" } },
366
+ :vizweb =>
367
+ { :numberOfLeaves => 17,
368
+ :textArea => "0.0074666341145833",
369
+ :numberOfTextGroups => 1,
370
+ :visualComplexity => 0.98656564934711,
371
+ :numberOfImageAreas => 1,
372
+ :hue => 0.92692057291667,
373
+ :horizontalSymmetry => "1",
374
+ :horizontalBalance => "null",
375
+ :colorfulness => 13.811903651406,
376
+ :nontextArea => 0.0044080946180556 },
377
+ :differenceMatrix =>
378
+ [{ :matrix =>
379
+ [0.0,
380
+ 347.57157536254,
381
+ 83.138438763306,
382
+ 155.335121592,
383
+ 193.98969044771,
384
+ 207.84609690827,
385
+ 67.0,
386
+ 347.57157536254,
387
+ 0.0,
388
+ 264.43524727237,
389
+ 192.26284092356,
390
+ 153.5903642811,
391
+ 139.73546436034,
392
+ 314.05254337451,
393
+ 83.138438763306,
394
+ 264.43524727237,
395
+ 0.0,
396
+ 72.228803118977,
397
+ 110.85125168441,
398
+ 124.70765814496,
399
+ 70.491134194308,
400
+ 155.335121592,
401
+ 192.26284092356,
402
+ 72.228803118977,
403
+ 0.0,
404
+ 38.794329482542,
405
+ 52.621288467691,
406
+ 129.71507237018,
407
+ 193.98969044771,
408
+ 153.5903642811,
409
+ 110.85125168441,
410
+ 38.794329482542,
411
+ 0.0,
412
+ 13.856406460551,
413
+ 164.66025628548,
414
+ 207.84609690827,
415
+ 139.73546436034,
416
+ 124.70765814496,
417
+ 52.621288467691,
418
+ 13.856406460551,
419
+ 0.0,
420
+ 177.78920102188,
421
+ 67.0,
422
+ 314.05254337451,
423
+ 70.491134194308,
424
+ 129.71507237018,
425
+ 164.66025628548,
426
+ 177.78920102188,
427
+ 0.0],
428
+ :palette =>
429
+ [4.0,
430
+ 4.0,
431
+ 4.0,
432
+ 206.0,
433
+ 205.0,
434
+ 203.0,
435
+ 52.0,
436
+ 52.0,
437
+ 52.0,
438
+ 96.0,
439
+ 93.0,
440
+ 92.0,
441
+ 116.0,
442
+ 116.0,
443
+ 116.0,
444
+ 124.0,
445
+ 124.0,
446
+ 124.0,
447
+ 4.0,
448
+ 4.0,
449
+ 71.0] }],
450
+ :ratios =>
451
+ { :pageSize =>
452
+ { :pageSize_in_pixel_squared => 1164339,
453
+ :pageWidth_in_pixel => 619,
454
+ :pageHeight_in_pixel => 1881 },
455
+ :img_Tag =>
456
+ { :img_Area_in_pixel_squared => 3612,
457
+ :img_document_tag_percentage => 0.3102189310845,
458
+ :img_document_tag_ratio => 322.35299003322 } },
459
+ :vicramComplexity => 1.3497 },
460
+ :link =>
461
+ { :form => [{ :formAction => "/Gesendet/" }],
462
+ :a =>
463
+ [{ :plaintext => "startseite", :href => "/" },
464
+ { :plaintext => "allgemeinen geschaef", :href => "/agb-rikscha.pdf" }] },
465
+ :picture => { :duration => 0.0 } } }] } }
466
+ end
467
+ end
468
+ end