relaton-w3c 1.11.6 → 1.12.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.adoc +6 -3
- data/grammars/biblio.rng +172 -48
- data/grammars/isodoc.rng +77 -0
- data/lib/relaton_w3c/data_fetcher.rb +101 -33
- data/lib/relaton_w3c/data_index.rb +29 -14
- data/lib/relaton_w3c/data_parser.rb +47 -22
- data/lib/relaton_w3c/processor.rb +3 -3
- data/lib/relaton_w3c/version.rb +1 -1
- data/lib/relaton_w3c/workgroups.yaml +285 -148
- data/relaton_w3c.gemspec +1 -1
- metadata +4 -29
- data/data/reference.W3C.CR-rdf-schema.xml +0 -16
- data/data/reference.W3C.DSig-label.xml +0 -32
- data/data/reference.W3C.P3P-rdfschema.xml +0 -26
- data/data/reference.W3C.P3P.xml +0 -38
- data/data/reference.W3C.PICS-labels.xml +0 -43
- data/data/reference.W3C.PICS-rules.xml +0 -38
- data/data/reference.W3C.PICS-services.xml +0 -37
- data/data/reference.W3C.REC-RUBY.xml +0 -22
- data/data/reference.W3C.REC-XHTML.xml +0 -12
- data/data/reference.W3C.REC-rdf-syntax.xml +0 -31
- data/data/reference.W3C.REC-xml-1998.xml +0 -20
- data/data/reference.W3C.REC-xml-names.xml +0 -28
- data/data/reference.W3C.REC-xml.xml +0 -35
- data/data/reference.W3C.REC-xmlenc-core.xml +0 -23
- data/data/reference.W3C.REC-xmlschema-1.xml +0 -23
- data/data/reference.W3C.REC-xmlschema-2.xml +0 -17
- data/data/reference.W3C.daml-oil-reference.xml +0 -39
- data/data/reference.W3C.soap11.xml +0 -56
- data/data/reference.W3C.soap12-part1.xml +0 -38
- data/data/reference.W3C.soap12-part2.xml +0 -38
- data/data/reference.W3C.xkms.xml +0 -50
- data/data/reference.W3C.xml-c14n.xml +0 -15
- data/data/reference.W3C.xmldsig-core.xml +0 -26
- data/data/reference.W3C.xmlenc-core.xml +0 -20
- data/data/reference.W3C.xpath.xml +0 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0bf7e3fdd349e94b272db7045c88bae46fe32f642436527486ff4db8abd08518
|
4
|
+
data.tar.gz: d66405c0f27950d42f1ceac65d3be95a7f93e19ff5045d4c4618ddb551d819b1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fb14cc520b1fbe3f5bc0dfd387b990b1693e5e7ab8c15d093372cf8ebe5f11ae4df521c5d05885da83ef60b3fe475dedaeb3dc764a9aaad22f9288de50307ede
|
7
|
+
data.tar.gz: 5d36eea643697240c15bc24abcc6b03414731fd1aed4dc48f4e86f32c7feba6cfa99e3e6f1503882b241414875534a480f4e3c215a39b9bccb60bf0d3c798df5
|
data/README.adoc
CHANGED
@@ -151,14 +151,17 @@ RelatonW3c::W3cBibliographicItem.new **bib_hash
|
|
151
151
|
|
152
152
|
=== Fetch data
|
153
153
|
|
154
|
-
|
155
|
-
|
156
|
-
The method `RelatonW3c::DataFetcher.fetch(output: "data", format: "yaml")` converts all the documents from the dataset and save them to the `./data` folder in YAML format.
|
154
|
+
The method `RelatonW3c::DataFetcher.fetch(source, output: "data", format: "yaml")` converts all the documents from the dataset and save them to the `./data` folder in YAML format.
|
157
155
|
Arguments:
|
158
156
|
|
157
|
+
- `source` - name of dataset (`w3c-rdf` or `w3c-tr-archive`)
|
159
158
|
- `output` - folder to save documents (default './data').
|
160
159
|
- `format` - format in which the documents are saved. Possible formats are: `yaml`, `xml`, `bibxml` (default `yaml`).
|
161
160
|
|
161
|
+
The available datasets are:
|
162
|
+
- `w3c-rdf` - The dataset is fetched from http://www.w3.org/2002/01/tr-automation/tr.rdf.
|
163
|
+
- `w3c-tr-archive` - The archive dataset files should be downloaded from https://github.com/relaton/w3c-tr-archive repository and placed into `w3c-tr-archive` folder.
|
164
|
+
|
162
165
|
[source,ruby]
|
163
166
|
----
|
164
167
|
RelatonW3c::DataFetcher.fetch
|
data/grammars/biblio.rng
CHANGED
@@ -527,7 +527,7 @@
|
|
527
527
|
</define>
|
528
528
|
<define name="LocalityType">
|
529
529
|
<data type="string">
|
530
|
-
<param name="pattern">section|clause|part|paragraph|chapter|page|whole|table|annex|figure|note|list|example|volume|issue|time|anchor|locality:[a-zA-Z0-9_]+</param>
|
530
|
+
<param name="pattern">section|clause|part|paragraph|chapter|page|title|line|whole|table|annex|figure|note|list|example|volume|issue|time|anchor|locality:[a-zA-Z0-9_]+</param>
|
531
531
|
</data>
|
532
532
|
</define>
|
533
533
|
<define name="referenceFrom">
|
@@ -614,12 +614,103 @@
|
|
614
614
|
<optional>
|
615
615
|
<ref name="fetched"/>
|
616
616
|
</optional>
|
617
|
-
<
|
618
|
-
<oneOrMore>
|
619
|
-
<ref name="btitle"/>
|
620
|
-
</oneOrMore>
|
617
|
+
<optional>
|
621
618
|
<ref name="formattedref"/>
|
622
|
-
</
|
619
|
+
</optional>
|
620
|
+
<oneOrMore>
|
621
|
+
<ref name="btitle"/>
|
622
|
+
</oneOrMore>
|
623
|
+
<zeroOrMore>
|
624
|
+
<ref name="bsource"/>
|
625
|
+
</zeroOrMore>
|
626
|
+
<oneOrMore>
|
627
|
+
<ref name="docidentifier"/>
|
628
|
+
</oneOrMore>
|
629
|
+
<optional>
|
630
|
+
<ref name="docnumber"/>
|
631
|
+
</optional>
|
632
|
+
<zeroOrMore>
|
633
|
+
<ref name="bdate"/>
|
634
|
+
</zeroOrMore>
|
635
|
+
<zeroOrMore>
|
636
|
+
<ref name="contributor"/>
|
637
|
+
</zeroOrMore>
|
638
|
+
<optional>
|
639
|
+
<ref name="edition"/>
|
640
|
+
</optional>
|
641
|
+
<zeroOrMore>
|
642
|
+
<ref name="version"/>
|
643
|
+
</zeroOrMore>
|
644
|
+
<zeroOrMore>
|
645
|
+
<ref name="biblionote"/>
|
646
|
+
</zeroOrMore>
|
647
|
+
<zeroOrMore>
|
648
|
+
<ref name="language"/>
|
649
|
+
</zeroOrMore>
|
650
|
+
<zeroOrMore>
|
651
|
+
<ref name="script"/>
|
652
|
+
</zeroOrMore>
|
653
|
+
<zeroOrMore>
|
654
|
+
<ref name="bibabstract"/>
|
655
|
+
</zeroOrMore>
|
656
|
+
<optional>
|
657
|
+
<ref name="status"/>
|
658
|
+
</optional>
|
659
|
+
<zeroOrMore>
|
660
|
+
<ref name="copyright"/>
|
661
|
+
</zeroOrMore>
|
662
|
+
<zeroOrMore>
|
663
|
+
<ref name="docrelation"/>
|
664
|
+
</zeroOrMore>
|
665
|
+
<zeroOrMore>
|
666
|
+
<ref name="series"/>
|
667
|
+
</zeroOrMore>
|
668
|
+
<optional>
|
669
|
+
<ref name="medium"/>
|
670
|
+
</optional>
|
671
|
+
<zeroOrMore>
|
672
|
+
<ref name="bplace"/>
|
673
|
+
</zeroOrMore>
|
674
|
+
<zeroOrMore>
|
675
|
+
<ref name="bprice"/>
|
676
|
+
</zeroOrMore>
|
677
|
+
<zeroOrMore>
|
678
|
+
<ref name="extent"/>
|
679
|
+
</zeroOrMore>
|
680
|
+
<optional>
|
681
|
+
<ref name="bibliographic_size"/>
|
682
|
+
</optional>
|
683
|
+
<zeroOrMore>
|
684
|
+
<ref name="accesslocation"/>
|
685
|
+
</zeroOrMore>
|
686
|
+
<zeroOrMore>
|
687
|
+
<ref name="license"/>
|
688
|
+
</zeroOrMore>
|
689
|
+
<zeroOrMore>
|
690
|
+
<ref name="bclassification"/>
|
691
|
+
</zeroOrMore>
|
692
|
+
<zeroOrMore>
|
693
|
+
<ref name="bkeyword"/>
|
694
|
+
</zeroOrMore>
|
695
|
+
<optional>
|
696
|
+
<ref name="validity"/>
|
697
|
+
</optional>
|
698
|
+
</define>
|
699
|
+
<define name="ReducedBibliographicItem">
|
700
|
+
<optional>
|
701
|
+
<attribute name="type">
|
702
|
+
<ref name="BibItemType"/>
|
703
|
+
</attribute>
|
704
|
+
</optional>
|
705
|
+
<optional>
|
706
|
+
<ref name="fetched"/>
|
707
|
+
</optional>
|
708
|
+
<optional>
|
709
|
+
<ref name="formattedref"/>
|
710
|
+
</optional>
|
711
|
+
<zeroOrMore>
|
712
|
+
<ref name="btitle"/>
|
713
|
+
</zeroOrMore>
|
623
714
|
<zeroOrMore>
|
624
715
|
<ref name="bsource"/>
|
625
716
|
</zeroOrMore>
|
@@ -638,9 +729,9 @@
|
|
638
729
|
<optional>
|
639
730
|
<ref name="edition"/>
|
640
731
|
</optional>
|
641
|
-
<
|
732
|
+
<zeroOrMore>
|
642
733
|
<ref name="version"/>
|
643
|
-
</
|
734
|
+
</zeroOrMore>
|
644
735
|
<zeroOrMore>
|
645
736
|
<ref name="biblionote"/>
|
646
737
|
</zeroOrMore>
|
@@ -833,6 +924,12 @@
|
|
833
924
|
<data type="boolean"/>
|
834
925
|
</attribute>
|
835
926
|
</optional>
|
927
|
+
<optional>
|
928
|
+
<attribute name="language"/>
|
929
|
+
</optional>
|
930
|
+
<optional>
|
931
|
+
<attribute name="script"/>
|
932
|
+
</optional>
|
836
933
|
<text/>
|
837
934
|
</element>
|
838
935
|
</define>
|
@@ -851,17 +948,46 @@
|
|
851
948
|
</define>
|
852
949
|
<define name="bplace">
|
853
950
|
<element name="place">
|
854
|
-
<
|
855
|
-
<
|
856
|
-
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
951
|
+
<choice>
|
952
|
+
<text/>
|
953
|
+
<group>
|
954
|
+
<ref name="bibliocity"/>
|
955
|
+
<zeroOrMore>
|
956
|
+
<ref name="biblioregion"/>
|
957
|
+
</zeroOrMore>
|
958
|
+
<zeroOrMore>
|
959
|
+
<ref name="bibliocountry"/>
|
960
|
+
</zeroOrMore>
|
961
|
+
</group>
|
962
|
+
</choice>
|
963
|
+
</element>
|
964
|
+
</define>
|
965
|
+
<define name="bibliocity">
|
966
|
+
<element name="city">
|
862
967
|
<text/>
|
863
968
|
</element>
|
864
969
|
</define>
|
970
|
+
<define name="biblioregion">
|
971
|
+
<element name="region">
|
972
|
+
<ref name="RegionType"/>
|
973
|
+
</element>
|
974
|
+
</define>
|
975
|
+
<define name="bibliocountry">
|
976
|
+
<element name="country">
|
977
|
+
<ref name="RegionType"/>
|
978
|
+
</element>
|
979
|
+
</define>
|
980
|
+
<define name="RegionType">
|
981
|
+
<optional>
|
982
|
+
<attribute name="iso"/>
|
983
|
+
</optional>
|
984
|
+
<optional>
|
985
|
+
<attribute name="recommended">
|
986
|
+
<data type="boolean"/>
|
987
|
+
</attribute>
|
988
|
+
</optional>
|
989
|
+
<text/>
|
990
|
+
</define>
|
865
991
|
<define name="bprice">
|
866
992
|
<element name="price">
|
867
993
|
<attribute name="currency"/>
|
@@ -957,36 +1083,34 @@
|
|
957
1083
|
<ref name="SeriesType"/>
|
958
1084
|
</attribute>
|
959
1085
|
</optional>
|
960
|
-
<
|
1086
|
+
<optional>
|
961
1087
|
<ref name="formattedref"/>
|
962
|
-
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
976
|
-
|
977
|
-
|
978
|
-
|
979
|
-
|
980
|
-
|
981
|
-
|
982
|
-
|
983
|
-
|
984
|
-
|
985
|
-
|
986
|
-
|
987
|
-
|
988
|
-
</group>
|
989
|
-
</choice>
|
1088
|
+
</optional>
|
1089
|
+
<ref name="btitle"/>
|
1090
|
+
<optional>
|
1091
|
+
<ref name="bplace"/>
|
1092
|
+
</optional>
|
1093
|
+
<optional>
|
1094
|
+
<ref name="seriesorganization"/>
|
1095
|
+
</optional>
|
1096
|
+
<optional>
|
1097
|
+
<ref name="abbreviation"/>
|
1098
|
+
</optional>
|
1099
|
+
<optional>
|
1100
|
+
<ref name="seriesfrom"/>
|
1101
|
+
</optional>
|
1102
|
+
<optional>
|
1103
|
+
<ref name="seriesto"/>
|
1104
|
+
</optional>
|
1105
|
+
<optional>
|
1106
|
+
<ref name="seriesnumber"/>
|
1107
|
+
</optional>
|
1108
|
+
<optional>
|
1109
|
+
<ref name="seriespartnumber"/>
|
1110
|
+
</optional>
|
1111
|
+
<optional>
|
1112
|
+
<ref name="seriesrun"/>
|
1113
|
+
</optional>
|
990
1114
|
</element>
|
991
1115
|
</define>
|
992
1116
|
<define name="SeriesType">
|
@@ -1145,7 +1269,7 @@
|
|
1145
1269
|
</element>
|
1146
1270
|
</optional>
|
1147
1271
|
<element name="bibitem">
|
1148
|
-
<ref name="
|
1272
|
+
<ref name="ReducedBibliographicItem"/>
|
1149
1273
|
</element>
|
1150
1274
|
<choice>
|
1151
1275
|
<zeroOrMore>
|
@@ -1170,9 +1294,9 @@
|
|
1170
1294
|
<optional>
|
1171
1295
|
<ref name="revision-date"/>
|
1172
1296
|
</optional>
|
1173
|
-
<
|
1297
|
+
<optional>
|
1174
1298
|
<ref name="draft"/>
|
1175
|
-
</
|
1299
|
+
</optional>
|
1176
1300
|
</element>
|
1177
1301
|
</define>
|
1178
1302
|
<define name="vedition">
|
data/grammars/isodoc.rng
CHANGED
@@ -32,6 +32,43 @@
|
|
32
32
|
<ref name="DocumentType"/>
|
33
33
|
</element>
|
34
34
|
</define>
|
35
|
+
<define name="admonition">
|
36
|
+
<element name="admonition">
|
37
|
+
<attribute name="type">
|
38
|
+
<ref name="AdmonitionType"/>
|
39
|
+
</attribute>
|
40
|
+
<optional>
|
41
|
+
<attribute name="class"/>
|
42
|
+
</optional>
|
43
|
+
<attribute name="id">
|
44
|
+
<data type="ID"/>
|
45
|
+
</attribute>
|
46
|
+
<optional>
|
47
|
+
<attribute name="uri">
|
48
|
+
<data type="anyURI"/>
|
49
|
+
</attribute>
|
50
|
+
</optional>
|
51
|
+
<optional>
|
52
|
+
<attribute name="coverpage">
|
53
|
+
<data type="boolean"/>
|
54
|
+
</attribute>
|
55
|
+
</optional>
|
56
|
+
<optional>
|
57
|
+
<attribute name="notag">
|
58
|
+
<data type="boolean"/>
|
59
|
+
</attribute>
|
60
|
+
</optional>
|
61
|
+
<optional>
|
62
|
+
<ref name="tname"/>
|
63
|
+
</optional>
|
64
|
+
<zeroOrMore>
|
65
|
+
<ref name="paragraph-with-footnote"/>
|
66
|
+
</zeroOrMore>
|
67
|
+
<zeroOrMore>
|
68
|
+
<ref name="note"/>
|
69
|
+
</zeroOrMore>
|
70
|
+
</element>
|
71
|
+
</define>
|
35
72
|
<define name="index">
|
36
73
|
<element name="index">
|
37
74
|
<optional>
|
@@ -79,6 +116,11 @@
|
|
79
116
|
<data type="boolean"/>
|
80
117
|
</attribute>
|
81
118
|
</optional>
|
119
|
+
<optional>
|
120
|
+
<attribute name="suppress_identifier">
|
121
|
+
<data type="boolean"/>
|
122
|
+
</attribute>
|
123
|
+
</optional>
|
82
124
|
<ref name="BibliographicItem"/>
|
83
125
|
</element>
|
84
126
|
</define>
|
@@ -245,6 +287,9 @@
|
|
245
287
|
<ref name="MultilingualRenderingType"/>
|
246
288
|
</attribute>
|
247
289
|
</optional>
|
290
|
+
<optional>
|
291
|
+
<ref name="tname"/>
|
292
|
+
</optional>
|
248
293
|
<oneOrMore>
|
249
294
|
<ref name="ul_li"/>
|
250
295
|
</oneOrMore>
|
@@ -287,6 +332,9 @@
|
|
287
332
|
</choice>
|
288
333
|
</attribute>
|
289
334
|
</optional>
|
335
|
+
<optional>
|
336
|
+
<ref name="tname"/>
|
337
|
+
</optional>
|
290
338
|
<oneOrMore>
|
291
339
|
<ref name="li"/>
|
292
340
|
</oneOrMore>
|
@@ -323,6 +371,9 @@
|
|
323
371
|
<ref name="MultilingualRenderingType"/>
|
324
372
|
</attribute>
|
325
373
|
</optional>
|
374
|
+
<optional>
|
375
|
+
<ref name="tname"/>
|
376
|
+
</optional>
|
326
377
|
<oneOrMore>
|
327
378
|
<ref name="dt"/>
|
328
379
|
<ref name="dd"/>
|
@@ -657,6 +708,9 @@
|
|
657
708
|
<optional>
|
658
709
|
<attribute name="tag"/>
|
659
710
|
</optional>
|
711
|
+
<optional>
|
712
|
+
<attribute name="type"/>
|
713
|
+
</optional>
|
660
714
|
<optional>
|
661
715
|
<attribute name="multilingual-rendering">
|
662
716
|
<ref name="MultilingualRenderingType"/>
|
@@ -692,6 +746,9 @@
|
|
692
746
|
<optional>
|
693
747
|
<attribute name="tag"/>
|
694
748
|
</optional>
|
749
|
+
<optional>
|
750
|
+
<attribute name="type"/>
|
751
|
+
</optional>
|
695
752
|
<optional>
|
696
753
|
<attribute name="multilingual-rendering">
|
697
754
|
<ref name="MultilingualRenderingType"/>
|
@@ -851,6 +908,16 @@
|
|
851
908
|
<ref name="MultilingualRenderingType"/>
|
852
909
|
</attribute>
|
853
910
|
</optional>
|
911
|
+
<optional>
|
912
|
+
<attribute name="coverpage">
|
913
|
+
<data type="boolean"/>
|
914
|
+
</attribute>
|
915
|
+
</optional>
|
916
|
+
<optional>
|
917
|
+
<attribute name="notag">
|
918
|
+
<data type="boolean"/>
|
919
|
+
</attribute>
|
920
|
+
</optional>
|
854
921
|
<oneOrMore>
|
855
922
|
<choice>
|
856
923
|
<ref name="paragraph"/>
|
@@ -1178,6 +1245,7 @@
|
|
1178
1245
|
<ref name="concept"/>
|
1179
1246
|
<ref name="add"/>
|
1180
1247
|
<ref name="del"/>
|
1248
|
+
<ref name="span"/>
|
1181
1249
|
</choice>
|
1182
1250
|
</define>
|
1183
1251
|
<define name="add">
|
@@ -1204,6 +1272,14 @@
|
|
1204
1272
|
</choice>
|
1205
1273
|
</element>
|
1206
1274
|
</define>
|
1275
|
+
<define name="span">
|
1276
|
+
<element name="span">
|
1277
|
+
<attribute name="class"/>
|
1278
|
+
<oneOrMore>
|
1279
|
+
<ref name="TextElement"/>
|
1280
|
+
</oneOrMore>
|
1281
|
+
</element>
|
1282
|
+
</define>
|
1207
1283
|
<define name="concept">
|
1208
1284
|
<element name="concept">
|
1209
1285
|
<optional>
|
@@ -1994,6 +2070,7 @@
|
|
1994
2070
|
<value>compare</value>
|
1995
2071
|
<value>contrast</value>
|
1996
2072
|
<value>see</value>
|
2073
|
+
<value>seealso</value>
|
1997
2074
|
</choice>
|
1998
2075
|
</define>
|
1999
2076
|
<define name="deprecates">
|
@@ -20,22 +20,21 @@ module RelatonW3c
|
|
20
20
|
@ext = format.sub(/^bib/, "")
|
21
21
|
dir = File.dirname(File.expand_path(__FILE__))
|
22
22
|
@group_names = YAML.load_file(File.join(dir, "workgroups.yaml"))
|
23
|
-
@data = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
|
24
|
-
@files = []
|
25
23
|
@index = DataIndex.new
|
26
24
|
end
|
27
25
|
|
28
26
|
#
|
29
27
|
# Initialize fetcher and run fetch
|
30
28
|
#
|
29
|
+
# @param [String] source source name "w3c-tr-archive" or "w3c-rdf"
|
31
30
|
# @param [Strin] output directory to save files, default: "data"
|
32
31
|
# @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml
|
33
32
|
#
|
34
|
-
def self.fetch(output: "data", format: "yaml")
|
33
|
+
def self.fetch(source, output: "data", format: "yaml")
|
35
34
|
t1 = Time.now
|
36
35
|
puts "Started at: #{t1}"
|
37
|
-
FileUtils.mkdir_p output
|
38
|
-
new(output, format).fetch
|
36
|
+
FileUtils.mkdir_p output
|
37
|
+
new(output, format).fetch source
|
39
38
|
t2 = Time.now
|
40
39
|
puts "Stopped at: #{t2}"
|
41
40
|
puts "Done in: #{(t2 - t1).round} sec."
|
@@ -44,47 +43,113 @@ module RelatonW3c
|
|
44
43
|
#
|
45
44
|
# Parse documents
|
46
45
|
#
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
46
|
+
# @param [String] source source name "w3c-tr-archive" or "w3c-rdf"
|
47
|
+
#
|
48
|
+
def fetch(source) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
49
|
+
each_dataset(source) do |rdf|
|
50
|
+
%i[versioned unversioned].each do |type|
|
51
|
+
send("query_#{type}_docs", rdf).each do |sl|
|
52
|
+
bib = DataParser.parse(rdf, sl, self)
|
53
|
+
add_has_edition_relation(bib) if type == :unversioned
|
54
|
+
save_doc bib
|
55
|
+
rescue StandardError => e
|
56
|
+
link = sl.respond_to?(:link) ? sl.link : sl.version_of
|
57
|
+
warn "Error: document #{link} #{e.message}"
|
58
|
+
warn e.backtrace.join("\n")
|
59
|
+
end
|
60
|
+
end
|
53
61
|
end
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
62
|
+
@index.sort!.save
|
63
|
+
end
|
64
|
+
|
65
|
+
#
|
66
|
+
# Add hasEdition relations form previous parsed document
|
67
|
+
#
|
68
|
+
# @param [RelatonW3c::W3cBibliographicItem] bib bibligraphic item
|
69
|
+
#
|
70
|
+
def add_has_edition_relation(bib) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity
|
71
|
+
file = file_name bib.docnumber
|
72
|
+
return unless File.exist? file
|
73
|
+
|
74
|
+
b = case @format
|
75
|
+
when "xml" then XMLParser.from_xml(File.read(file, encoding: "UTF-8"))
|
76
|
+
when "yaml"
|
77
|
+
hash = YAML.load_file(file)
|
78
|
+
W3cBibliographicItem.from_hash(hash)
|
79
|
+
when "bibxml" then BibXMLParser.parse File.read(file, encoding: "UTF-8")
|
80
|
+
end
|
81
|
+
b.relation.each do |r|
|
82
|
+
same_edition = bib.relation.detect { |r2| same_edition?(r, r2) }
|
83
|
+
bib.relation << r unless same_edition
|
59
84
|
end
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
85
|
+
end
|
86
|
+
|
87
|
+
#
|
88
|
+
# Compare two relations
|
89
|
+
#
|
90
|
+
# @param [RelatonW3c::W3cBibliographicItem] rel1 relation 1
|
91
|
+
# @param [RelatonW3c::W3cBibliographicItem] rel2 relation 2
|
92
|
+
#
|
93
|
+
# @return [Boolean] true if relations are same
|
94
|
+
#
|
95
|
+
def same_edition?(rel1, rel2)
|
96
|
+
return false unless rel1.type == "hasEdition" && rel1.type == rel2.type
|
97
|
+
|
98
|
+
ids1 = rel1.bibitem.docidentifier.map(&:id)
|
99
|
+
ids2 = rel2.bibitem.docidentifier.map(&:id)
|
100
|
+
(ids1 & ids2).any?
|
101
|
+
end
|
102
|
+
|
103
|
+
#
|
104
|
+
# Yield fetching for each dataset
|
105
|
+
#
|
106
|
+
# @param [String] source source name "w3c-tr-archive" or "w3c-rdf"
|
107
|
+
#
|
108
|
+
# @yield [RDF::Repository] RDF repository
|
109
|
+
#
|
110
|
+
def each_dataset(source, &_block) # rubocop:disable Metrics/MethodLength
|
111
|
+
case source
|
112
|
+
when "w3c-tr-archive"
|
113
|
+
Dir["w3c-tr-archive/*.rdf"].map do |f|
|
114
|
+
@files = []
|
115
|
+
yield RDF::Repository.load(f)
|
116
|
+
end
|
117
|
+
when "w3c-rdf"
|
118
|
+
@files = []
|
119
|
+
rdf = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
|
120
|
+
yield rdf
|
121
|
+
# parse_static_dataset
|
66
122
|
end
|
67
|
-
@index.sort!.save
|
68
123
|
end
|
69
124
|
|
125
|
+
#
|
126
|
+
# Parse static dataset
|
127
|
+
#
|
128
|
+
# def parse_static_dataset
|
129
|
+
# Dir[File.expand_path("../../data/*", __dir__)].each do |file|
|
130
|
+
# xml = File.read file, encoding: "UTF-8"
|
131
|
+
# save_doc BibXMLParser.parse(xml), warn_duplicate: false
|
132
|
+
# rescue StandardError => e
|
133
|
+
# warn "Error: document #{file} #{e.message}"
|
134
|
+
# warn e.backtrace.join("\n")
|
135
|
+
# end
|
136
|
+
# end
|
137
|
+
|
70
138
|
#
|
71
139
|
# Query RDF source for versioned documents
|
72
140
|
#
|
73
141
|
# @return [RDF::Query::Solutions] query results
|
74
142
|
#
|
75
|
-
def query_versioned_docs
|
143
|
+
def query_versioned_docs(rdf)
|
76
144
|
sse = SPARQL.parse(%(
|
77
145
|
PREFIX : <http://www.w3.org/2001/02pd/rec54#>
|
78
146
|
PREFIX dc: <http://purl.org/dc/elements/1.1/>
|
79
147
|
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
80
|
-
# PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
|
81
148
|
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
|
82
|
-
SELECT ?link ?title ?date
|
83
|
-
WHERE {
|
84
|
-
?link dc:title ?title ; dc:date ?date ; doc:versionOf ?version_of .
|
85
|
-
}
|
149
|
+
SELECT ?link ?title ?date
|
150
|
+
WHERE { ?link dc:title ?title ; dc:date ?date . }
|
86
151
|
))
|
87
|
-
|
152
|
+
rdf.query sse
|
88
153
|
end
|
89
154
|
|
90
155
|
#
|
@@ -92,13 +157,16 @@ module RelatonW3c
|
|
92
157
|
#
|
93
158
|
# @return [Array<RDF::Query::Solution>] query results
|
94
159
|
#
|
95
|
-
def query_unversioned_docs
|
160
|
+
def query_unversioned_docs(rdf)
|
96
161
|
sse = SPARQL.parse(%(
|
97
162
|
PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
|
98
163
|
SELECT ?version_of
|
99
|
-
WHERE {
|
164
|
+
WHERE {
|
165
|
+
?link doc:versionOf ?version_of .
|
166
|
+
FILTER ( isURI(?link) && isURI(?version_of) && ?link != ?version_of )
|
167
|
+
}
|
100
168
|
))
|
101
|
-
|
169
|
+
rdf.query(sse).uniq { |s| s.version_of.to_s.sub(/^https?:\/\//, "").sub(/\/$/, "") }
|
102
170
|
end
|
103
171
|
|
104
172
|
#
|