relaton-w3c 1.11.6 → 1.12.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +6 -3
  3. data/grammars/biblio.rng +172 -48
  4. data/grammars/isodoc.rng +77 -0
  5. data/lib/relaton_w3c/data_fetcher.rb +101 -33
  6. data/lib/relaton_w3c/data_index.rb +29 -14
  7. data/lib/relaton_w3c/data_parser.rb +47 -22
  8. data/lib/relaton_w3c/processor.rb +3 -3
  9. data/lib/relaton_w3c/version.rb +1 -1
  10. data/lib/relaton_w3c/workgroups.yaml +285 -148
  11. data/relaton_w3c.gemspec +1 -1
  12. metadata +4 -29
  13. data/data/reference.W3C.CR-rdf-schema.xml +0 -16
  14. data/data/reference.W3C.DSig-label.xml +0 -32
  15. data/data/reference.W3C.P3P-rdfschema.xml +0 -26
  16. data/data/reference.W3C.P3P.xml +0 -38
  17. data/data/reference.W3C.PICS-labels.xml +0 -43
  18. data/data/reference.W3C.PICS-rules.xml +0 -38
  19. data/data/reference.W3C.PICS-services.xml +0 -37
  20. data/data/reference.W3C.REC-RUBY.xml +0 -22
  21. data/data/reference.W3C.REC-XHTML.xml +0 -12
  22. data/data/reference.W3C.REC-rdf-syntax.xml +0 -31
  23. data/data/reference.W3C.REC-xml-1998.xml +0 -20
  24. data/data/reference.W3C.REC-xml-names.xml +0 -28
  25. data/data/reference.W3C.REC-xml.xml +0 -35
  26. data/data/reference.W3C.REC-xmlenc-core.xml +0 -23
  27. data/data/reference.W3C.REC-xmlschema-1.xml +0 -23
  28. data/data/reference.W3C.REC-xmlschema-2.xml +0 -17
  29. data/data/reference.W3C.daml-oil-reference.xml +0 -39
  30. data/data/reference.W3C.soap11.xml +0 -56
  31. data/data/reference.W3C.soap12-part1.xml +0 -38
  32. data/data/reference.W3C.soap12-part2.xml +0 -38
  33. data/data/reference.W3C.xkms.xml +0 -50
  34. data/data/reference.W3C.xml-c14n.xml +0 -15
  35. data/data/reference.W3C.xmldsig-core.xml +0 -26
  36. data/data/reference.W3C.xmlenc-core.xml +0 -20
  37. data/data/reference.W3C.xpath.xml +0 -22
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: faf78f2abf348e9aa6dd74f2f0fe94899d9d6fc34af3368849a818b6dd856e26
4
- data.tar.gz: c4cabbd9559b9f122f34fbb204f7780606bfc87c9db999492fbdd2b6b02639b3
3
+ metadata.gz: 0bf7e3fdd349e94b272db7045c88bae46fe32f642436527486ff4db8abd08518
4
+ data.tar.gz: d66405c0f27950d42f1ceac65d3be95a7f93e19ff5045d4c4618ddb551d819b1
5
5
  SHA512:
6
- metadata.gz: cc6e6c80914b8973091795b1f234ea57a49499bb0466ae04a14358c30e526c38a6f607031e9dd0d4963c2caccca26dd9418939c871b4f47016aa094156d783a3
7
- data.tar.gz: 041625ce66ae2949112bc20856de89b05fd4772c1dd1f3413a3a5b04059ec7cb84f8b92517a3634294080031760fca8795b7ae113bbfae03986f775c5f52acb0
6
+ metadata.gz: fb14cc520b1fbe3f5bc0dfd387b990b1693e5e7ab8c15d093372cf8ebe5f11ae4df521c5d05885da83ef60b3fe475dedaeb3dc764a9aaad22f9288de50307ede
7
+ data.tar.gz: 5d36eea643697240c15bc24abcc6b03414731fd1aed4dc48f4e86f32c7feba6cfa99e3e6f1503882b241414875534a480f4e3c215a39b9bccb60bf0d3c798df5
data/README.adoc CHANGED
@@ -151,14 +151,17 @@ RelatonW3c::W3cBibliographicItem.new **bib_hash
151
151
 
152
152
  === Fetch data
153
153
 
154
- There is a W3C dataset http://www.w3.org/2002/01/tr-automation/tr.rdf which can be converted into RelatonXML/BibXML/BibYAML formats. The static files from the `path_to_gem/withdrawn` directory are added to output during fetching data.
155
-
156
- The method `RelatonW3c::DataFetcher.fetch(output: "data", format: "yaml")` converts all the documents from the dataset and save them to the `./data` folder in YAML format.
154
+ The method `RelatonW3c::DataFetcher.fetch(source, output: "data", format: "yaml")` converts all the documents from the dataset and save them to the `./data` folder in YAML format.
157
155
  Arguments:
158
156
 
157
+ - `source` - name of dataset (`w3c-rdf` or `w3c-tr-archive`)
159
158
  - `output` - folder to save documents (default './data').
160
159
  - `format` - format in which the documents are saved. Possible formats are: `yaml`, `xml`, `bibxml` (default `yaml`).
161
160
 
161
+ The available datasets are:
162
+ - `w3c-rdf` - The dataset is fetched from http://www.w3.org/2002/01/tr-automation/tr.rdf.
163
+ - `w3c-tr-archive` - The archive dataset files should be downloaded from https://github.com/relaton/w3c-tr-archive repository and placed into `w3c-tr-archive` folder.
164
+
162
165
  [source,ruby]
163
166
  ----
164
167
  RelatonW3c::DataFetcher.fetch
data/grammars/biblio.rng CHANGED
@@ -527,7 +527,7 @@
527
527
  </define>
528
528
  <define name="LocalityType">
529
529
  <data type="string">
530
- <param name="pattern">section|clause|part|paragraph|chapter|page|whole|table|annex|figure|note|list|example|volume|issue|time|anchor|locality:[a-zA-Z0-9_]+</param>
530
+ <param name="pattern">section|clause|part|paragraph|chapter|page|title|line|whole|table|annex|figure|note|list|example|volume|issue|time|anchor|locality:[a-zA-Z0-9_]+</param>
531
531
  </data>
532
532
  </define>
533
533
  <define name="referenceFrom">
@@ -614,12 +614,103 @@
614
614
  <optional>
615
615
  <ref name="fetched"/>
616
616
  </optional>
617
- <choice>
618
- <oneOrMore>
619
- <ref name="btitle"/>
620
- </oneOrMore>
617
+ <optional>
621
618
  <ref name="formattedref"/>
622
- </choice>
619
+ </optional>
620
+ <oneOrMore>
621
+ <ref name="btitle"/>
622
+ </oneOrMore>
623
+ <zeroOrMore>
624
+ <ref name="bsource"/>
625
+ </zeroOrMore>
626
+ <oneOrMore>
627
+ <ref name="docidentifier"/>
628
+ </oneOrMore>
629
+ <optional>
630
+ <ref name="docnumber"/>
631
+ </optional>
632
+ <zeroOrMore>
633
+ <ref name="bdate"/>
634
+ </zeroOrMore>
635
+ <zeroOrMore>
636
+ <ref name="contributor"/>
637
+ </zeroOrMore>
638
+ <optional>
639
+ <ref name="edition"/>
640
+ </optional>
641
+ <zeroOrMore>
642
+ <ref name="version"/>
643
+ </zeroOrMore>
644
+ <zeroOrMore>
645
+ <ref name="biblionote"/>
646
+ </zeroOrMore>
647
+ <zeroOrMore>
648
+ <ref name="language"/>
649
+ </zeroOrMore>
650
+ <zeroOrMore>
651
+ <ref name="script"/>
652
+ </zeroOrMore>
653
+ <zeroOrMore>
654
+ <ref name="bibabstract"/>
655
+ </zeroOrMore>
656
+ <optional>
657
+ <ref name="status"/>
658
+ </optional>
659
+ <zeroOrMore>
660
+ <ref name="copyright"/>
661
+ </zeroOrMore>
662
+ <zeroOrMore>
663
+ <ref name="docrelation"/>
664
+ </zeroOrMore>
665
+ <zeroOrMore>
666
+ <ref name="series"/>
667
+ </zeroOrMore>
668
+ <optional>
669
+ <ref name="medium"/>
670
+ </optional>
671
+ <zeroOrMore>
672
+ <ref name="bplace"/>
673
+ </zeroOrMore>
674
+ <zeroOrMore>
675
+ <ref name="bprice"/>
676
+ </zeroOrMore>
677
+ <zeroOrMore>
678
+ <ref name="extent"/>
679
+ </zeroOrMore>
680
+ <optional>
681
+ <ref name="bibliographic_size"/>
682
+ </optional>
683
+ <zeroOrMore>
684
+ <ref name="accesslocation"/>
685
+ </zeroOrMore>
686
+ <zeroOrMore>
687
+ <ref name="license"/>
688
+ </zeroOrMore>
689
+ <zeroOrMore>
690
+ <ref name="bclassification"/>
691
+ </zeroOrMore>
692
+ <zeroOrMore>
693
+ <ref name="bkeyword"/>
694
+ </zeroOrMore>
695
+ <optional>
696
+ <ref name="validity"/>
697
+ </optional>
698
+ </define>
699
+ <define name="ReducedBibliographicItem">
700
+ <optional>
701
+ <attribute name="type">
702
+ <ref name="BibItemType"/>
703
+ </attribute>
704
+ </optional>
705
+ <optional>
706
+ <ref name="fetched"/>
707
+ </optional>
708
+ <optional>
709
+ <ref name="formattedref"/>
710
+ </optional>
711
+ <zeroOrMore>
712
+ <ref name="btitle"/>
713
+ </zeroOrMore>
623
714
  <zeroOrMore>
624
715
  <ref name="bsource"/>
625
716
  </zeroOrMore>
@@ -638,9 +729,9 @@
638
729
  <optional>
639
730
  <ref name="edition"/>
640
731
  </optional>
641
- <optional>
732
+ <zeroOrMore>
642
733
  <ref name="version"/>
643
- </optional>
734
+ </zeroOrMore>
644
735
  <zeroOrMore>
645
736
  <ref name="biblionote"/>
646
737
  </zeroOrMore>
@@ -833,6 +924,12 @@
833
924
  <data type="boolean"/>
834
925
  </attribute>
835
926
  </optional>
927
+ <optional>
928
+ <attribute name="language"/>
929
+ </optional>
930
+ <optional>
931
+ <attribute name="script"/>
932
+ </optional>
836
933
  <text/>
837
934
  </element>
838
935
  </define>
@@ -851,17 +948,46 @@
851
948
  </define>
852
949
  <define name="bplace">
853
950
  <element name="place">
854
- <optional>
855
- <attribute name="uri">
856
- <data type="anyURI"/>
857
- </attribute>
858
- </optional>
859
- <optional>
860
- <attribute name="region"/>
861
- </optional>
951
+ <choice>
952
+ <text/>
953
+ <group>
954
+ <ref name="bibliocity"/>
955
+ <zeroOrMore>
956
+ <ref name="biblioregion"/>
957
+ </zeroOrMore>
958
+ <zeroOrMore>
959
+ <ref name="bibliocountry"/>
960
+ </zeroOrMore>
961
+ </group>
962
+ </choice>
963
+ </element>
964
+ </define>
965
+ <define name="bibliocity">
966
+ <element name="city">
862
967
  <text/>
863
968
  </element>
864
969
  </define>
970
+ <define name="biblioregion">
971
+ <element name="region">
972
+ <ref name="RegionType"/>
973
+ </element>
974
+ </define>
975
+ <define name="bibliocountry">
976
+ <element name="country">
977
+ <ref name="RegionType"/>
978
+ </element>
979
+ </define>
980
+ <define name="RegionType">
981
+ <optional>
982
+ <attribute name="iso"/>
983
+ </optional>
984
+ <optional>
985
+ <attribute name="recommended">
986
+ <data type="boolean"/>
987
+ </attribute>
988
+ </optional>
989
+ <text/>
990
+ </define>
865
991
  <define name="bprice">
866
992
  <element name="price">
867
993
  <attribute name="currency"/>
@@ -957,36 +1083,34 @@
957
1083
  <ref name="SeriesType"/>
958
1084
  </attribute>
959
1085
  </optional>
960
- <choice>
1086
+ <optional>
961
1087
  <ref name="formattedref"/>
962
- <group>
963
- <ref name="btitle"/>
964
- <optional>
965
- <ref name="bplace"/>
966
- </optional>
967
- <optional>
968
- <ref name="seriesorganization"/>
969
- </optional>
970
- <optional>
971
- <ref name="abbreviation"/>
972
- </optional>
973
- <optional>
974
- <ref name="seriesfrom"/>
975
- </optional>
976
- <optional>
977
- <ref name="seriesto"/>
978
- </optional>
979
- <optional>
980
- <ref name="seriesnumber"/>
981
- </optional>
982
- <optional>
983
- <ref name="seriespartnumber"/>
984
- </optional>
985
- <optional>
986
- <ref name="seriesrun"/>
987
- </optional>
988
- </group>
989
- </choice>
1088
+ </optional>
1089
+ <ref name="btitle"/>
1090
+ <optional>
1091
+ <ref name="bplace"/>
1092
+ </optional>
1093
+ <optional>
1094
+ <ref name="seriesorganization"/>
1095
+ </optional>
1096
+ <optional>
1097
+ <ref name="abbreviation"/>
1098
+ </optional>
1099
+ <optional>
1100
+ <ref name="seriesfrom"/>
1101
+ </optional>
1102
+ <optional>
1103
+ <ref name="seriesto"/>
1104
+ </optional>
1105
+ <optional>
1106
+ <ref name="seriesnumber"/>
1107
+ </optional>
1108
+ <optional>
1109
+ <ref name="seriespartnumber"/>
1110
+ </optional>
1111
+ <optional>
1112
+ <ref name="seriesrun"/>
1113
+ </optional>
990
1114
  </element>
991
1115
  </define>
992
1116
  <define name="SeriesType">
@@ -1145,7 +1269,7 @@
1145
1269
  </element>
1146
1270
  </optional>
1147
1271
  <element name="bibitem">
1148
- <ref name="BibliographicItem"/>
1272
+ <ref name="ReducedBibliographicItem"/>
1149
1273
  </element>
1150
1274
  <choice>
1151
1275
  <zeroOrMore>
@@ -1170,9 +1294,9 @@
1170
1294
  <optional>
1171
1295
  <ref name="revision-date"/>
1172
1296
  </optional>
1173
- <zeroOrMore>
1297
+ <optional>
1174
1298
  <ref name="draft"/>
1175
- </zeroOrMore>
1299
+ </optional>
1176
1300
  </element>
1177
1301
  </define>
1178
1302
  <define name="vedition">
data/grammars/isodoc.rng CHANGED
@@ -32,6 +32,43 @@
32
32
  <ref name="DocumentType"/>
33
33
  </element>
34
34
  </define>
35
+ <define name="admonition">
36
+ <element name="admonition">
37
+ <attribute name="type">
38
+ <ref name="AdmonitionType"/>
39
+ </attribute>
40
+ <optional>
41
+ <attribute name="class"/>
42
+ </optional>
43
+ <attribute name="id">
44
+ <data type="ID"/>
45
+ </attribute>
46
+ <optional>
47
+ <attribute name="uri">
48
+ <data type="anyURI"/>
49
+ </attribute>
50
+ </optional>
51
+ <optional>
52
+ <attribute name="coverpage">
53
+ <data type="boolean"/>
54
+ </attribute>
55
+ </optional>
56
+ <optional>
57
+ <attribute name="notag">
58
+ <data type="boolean"/>
59
+ </attribute>
60
+ </optional>
61
+ <optional>
62
+ <ref name="tname"/>
63
+ </optional>
64
+ <zeroOrMore>
65
+ <ref name="paragraph-with-footnote"/>
66
+ </zeroOrMore>
67
+ <zeroOrMore>
68
+ <ref name="note"/>
69
+ </zeroOrMore>
70
+ </element>
71
+ </define>
35
72
  <define name="index">
36
73
  <element name="index">
37
74
  <optional>
@@ -79,6 +116,11 @@
79
116
  <data type="boolean"/>
80
117
  </attribute>
81
118
  </optional>
119
+ <optional>
120
+ <attribute name="suppress_identifier">
121
+ <data type="boolean"/>
122
+ </attribute>
123
+ </optional>
82
124
  <ref name="BibliographicItem"/>
83
125
  </element>
84
126
  </define>
@@ -245,6 +287,9 @@
245
287
  <ref name="MultilingualRenderingType"/>
246
288
  </attribute>
247
289
  </optional>
290
+ <optional>
291
+ <ref name="tname"/>
292
+ </optional>
248
293
  <oneOrMore>
249
294
  <ref name="ul_li"/>
250
295
  </oneOrMore>
@@ -287,6 +332,9 @@
287
332
  </choice>
288
333
  </attribute>
289
334
  </optional>
335
+ <optional>
336
+ <ref name="tname"/>
337
+ </optional>
290
338
  <oneOrMore>
291
339
  <ref name="li"/>
292
340
  </oneOrMore>
@@ -323,6 +371,9 @@
323
371
  <ref name="MultilingualRenderingType"/>
324
372
  </attribute>
325
373
  </optional>
374
+ <optional>
375
+ <ref name="tname"/>
376
+ </optional>
326
377
  <oneOrMore>
327
378
  <ref name="dt"/>
328
379
  <ref name="dd"/>
@@ -657,6 +708,9 @@
657
708
  <optional>
658
709
  <attribute name="tag"/>
659
710
  </optional>
711
+ <optional>
712
+ <attribute name="type"/>
713
+ </optional>
660
714
  <optional>
661
715
  <attribute name="multilingual-rendering">
662
716
  <ref name="MultilingualRenderingType"/>
@@ -692,6 +746,9 @@
692
746
  <optional>
693
747
  <attribute name="tag"/>
694
748
  </optional>
749
+ <optional>
750
+ <attribute name="type"/>
751
+ </optional>
695
752
  <optional>
696
753
  <attribute name="multilingual-rendering">
697
754
  <ref name="MultilingualRenderingType"/>
@@ -851,6 +908,16 @@
851
908
  <ref name="MultilingualRenderingType"/>
852
909
  </attribute>
853
910
  </optional>
911
+ <optional>
912
+ <attribute name="coverpage">
913
+ <data type="boolean"/>
914
+ </attribute>
915
+ </optional>
916
+ <optional>
917
+ <attribute name="notag">
918
+ <data type="boolean"/>
919
+ </attribute>
920
+ </optional>
854
921
  <oneOrMore>
855
922
  <choice>
856
923
  <ref name="paragraph"/>
@@ -1178,6 +1245,7 @@
1178
1245
  <ref name="concept"/>
1179
1246
  <ref name="add"/>
1180
1247
  <ref name="del"/>
1248
+ <ref name="span"/>
1181
1249
  </choice>
1182
1250
  </define>
1183
1251
  <define name="add">
@@ -1204,6 +1272,14 @@
1204
1272
  </choice>
1205
1273
  </element>
1206
1274
  </define>
1275
+ <define name="span">
1276
+ <element name="span">
1277
+ <attribute name="class"/>
1278
+ <oneOrMore>
1279
+ <ref name="TextElement"/>
1280
+ </oneOrMore>
1281
+ </element>
1282
+ </define>
1207
1283
  <define name="concept">
1208
1284
  <element name="concept">
1209
1285
  <optional>
@@ -1994,6 +2070,7 @@
1994
2070
  <value>compare</value>
1995
2071
  <value>contrast</value>
1996
2072
  <value>see</value>
2073
+ <value>seealso</value>
1997
2074
  </choice>
1998
2075
  </define>
1999
2076
  <define name="deprecates">
@@ -20,22 +20,21 @@ module RelatonW3c
20
20
  @ext = format.sub(/^bib/, "")
21
21
  dir = File.dirname(File.expand_path(__FILE__))
22
22
  @group_names = YAML.load_file(File.join(dir, "workgroups.yaml"))
23
- @data = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
24
- @files = []
25
23
  @index = DataIndex.new
26
24
  end
27
25
 
28
26
  #
29
27
  # Initialize fetcher and run fetch
30
28
  #
29
+ # @param [String] source source name "w3c-tr-archive" or "w3c-rdf"
31
30
  # @param [Strin] output directory to save files, default: "data"
32
31
  # @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml
33
32
  #
34
- def self.fetch(output: "data", format: "yaml")
33
+ def self.fetch(source, output: "data", format: "yaml")
35
34
  t1 = Time.now
36
35
  puts "Started at: #{t1}"
37
- FileUtils.mkdir_p output unless Dir.exist? output
38
- new(output, format).fetch
36
+ FileUtils.mkdir_p output
37
+ new(output, format).fetch source
39
38
  t2 = Time.now
40
39
  puts "Stopped at: #{t2}"
41
40
  puts "Done in: #{(t2 - t1).round} sec."
@@ -44,47 +43,113 @@ module RelatonW3c
44
43
  #
45
44
  # Parse documents
46
45
  #
47
- def fetch # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
48
- query_versioned_docs.each do |sl|
49
- save_doc DataParser.parse(sl, self)
50
- rescue StandardError => e
51
- warn "Error: document #{sl.link} #{e.message}"
52
- warn e.backtrace.join("\n")
46
+ # @param [String] source source name "w3c-tr-archive" or "w3c-rdf"
47
+ #
48
+ def fetch(source) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
49
+ each_dataset(source) do |rdf|
50
+ %i[versioned unversioned].each do |type|
51
+ send("query_#{type}_docs", rdf).each do |sl|
52
+ bib = DataParser.parse(rdf, sl, self)
53
+ add_has_edition_relation(bib) if type == :unversioned
54
+ save_doc bib
55
+ rescue StandardError => e
56
+ link = sl.respond_to?(:link) ? sl.link : sl.version_of
57
+ warn "Error: document #{link} #{e.message}"
58
+ warn e.backtrace.join("\n")
59
+ end
60
+ end
53
61
  end
54
- query_unversioned_docs.each do |sl|
55
- save_doc DataParser.parse(sl, self)
56
- rescue StandardError => e
57
- warn "Error: document #{sl.version_of} #{e.message}"
58
- warn e.backtrace.join("\n")
62
+ @index.sort!.save
63
+ end
64
+
65
+ #
66
+ # Add hasEdition relations form previous parsed document
67
+ #
68
+ # @param [RelatonW3c::W3cBibliographicItem] bib bibligraphic item
69
+ #
70
+ def add_has_edition_relation(bib) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity
71
+ file = file_name bib.docnumber
72
+ return unless File.exist? file
73
+
74
+ b = case @format
75
+ when "xml" then XMLParser.from_xml(File.read(file, encoding: "UTF-8"))
76
+ when "yaml"
77
+ hash = YAML.load_file(file)
78
+ W3cBibliographicItem.from_hash(hash)
79
+ when "bibxml" then BibXMLParser.parse File.read(file, encoding: "UTF-8")
80
+ end
81
+ b.relation.each do |r|
82
+ same_edition = bib.relation.detect { |r2| same_edition?(r, r2) }
83
+ bib.relation << r unless same_edition
59
84
  end
60
- Dir[File.expand_path("../../data/*", __dir__)].each do |file|
61
- xml = File.read file, encoding: "UTF-8"
62
- save_doc BibXMLParser.parse(xml), warn_duplicate: false
63
- rescue StandardError => e
64
- warn "Error: document #{file} #{e.message}"
65
- warn e.backtrace.join("\n")
85
+ end
86
+
87
+ #
88
+ # Compare two relations
89
+ #
90
+ # @param [RelatonW3c::W3cBibliographicItem] rel1 relation 1
91
+ # @param [RelatonW3c::W3cBibliographicItem] rel2 relation 2
92
+ #
93
+ # @return [Boolean] true if relations are same
94
+ #
95
+ def same_edition?(rel1, rel2)
96
+ return false unless rel1.type == "hasEdition" && rel1.type == rel2.type
97
+
98
+ ids1 = rel1.bibitem.docidentifier.map(&:id)
99
+ ids2 = rel2.bibitem.docidentifier.map(&:id)
100
+ (ids1 & ids2).any?
101
+ end
102
+
103
+ #
104
+ # Yield fetching for each dataset
105
+ #
106
+ # @param [String] source source name "w3c-tr-archive" or "w3c-rdf"
107
+ #
108
+ # @yield [RDF::Repository] RDF repository
109
+ #
110
+ def each_dataset(source, &_block) # rubocop:disable Metrics/MethodLength
111
+ case source
112
+ when "w3c-tr-archive"
113
+ Dir["w3c-tr-archive/*.rdf"].map do |f|
114
+ @files = []
115
+ yield RDF::Repository.load(f)
116
+ end
117
+ when "w3c-rdf"
118
+ @files = []
119
+ rdf = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
120
+ yield rdf
121
+ # parse_static_dataset
66
122
  end
67
- @index.sort!.save
68
123
  end
69
124
 
125
+ #
126
+ # Parse static dataset
127
+ #
128
+ # def parse_static_dataset
129
+ # Dir[File.expand_path("../../data/*", __dir__)].each do |file|
130
+ # xml = File.read file, encoding: "UTF-8"
131
+ # save_doc BibXMLParser.parse(xml), warn_duplicate: false
132
+ # rescue StandardError => e
133
+ # warn "Error: document #{file} #{e.message}"
134
+ # warn e.backtrace.join("\n")
135
+ # end
136
+ # end
137
+
70
138
  #
71
139
  # Query RDF source for versioned documents
72
140
  #
73
141
  # @return [RDF::Query::Solutions] query results
74
142
  #
75
- def query_versioned_docs # rubocop:disable Metrics/MethodLength
143
+ def query_versioned_docs(rdf)
76
144
  sse = SPARQL.parse(%(
77
145
  PREFIX : <http://www.w3.org/2001/02pd/rec54#>
78
146
  PREFIX dc: <http://purl.org/dc/elements/1.1/>
79
147
  PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
80
- # PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
81
148
  PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
82
- SELECT ?link ?title ?date ?version_of
83
- WHERE {
84
- ?link dc:title ?title ; dc:date ?date ; doc:versionOf ?version_of .
85
- }
149
+ SELECT ?link ?title ?date
150
+ WHERE { ?link dc:title ?title ; dc:date ?date . }
86
151
  ))
87
- data.query sse
152
+ rdf.query sse
88
153
  end
89
154
 
90
155
  #
@@ -92,13 +157,16 @@ module RelatonW3c
92
157
  #
93
158
  # @return [Array<RDF::Query::Solution>] query results
94
159
  #
95
- def query_unversioned_docs
160
+ def query_unversioned_docs(rdf)
96
161
  sse = SPARQL.parse(%(
97
162
  PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
98
163
  SELECT ?version_of
99
- WHERE { ?x doc:versionOf ?version_of . }
164
+ WHERE {
165
+ ?link doc:versionOf ?version_of .
166
+ FILTER ( isURI(?link) && isURI(?version_of) && ?link != ?version_of )
167
+ }
100
168
  ))
101
- data.query(sse).uniq &:version_of
169
+ rdf.query(sse).uniq { |s| s.version_of.to_s.sub(/^https?:\/\//, "").sub(/\/$/, "") }
102
170
  end
103
171
 
104
172
  #