relaton-w3c 1.7.1 → 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aba37fe8aeba8328386ea5c4ce17b2bdbabd98a7608e2a942d61a83fb71b4d8a
4
- data.tar.gz: 7767bea526cfcc2a26399ba599e16e21f71b9683c30dc72570cf59b0ccbc7310
3
+ metadata.gz: ea91306625ad2f0f53980c20651589cd04de493874cbc209a91bafaec46fa9a2
4
+ data.tar.gz: '04608d998f36b0fc01cfd2a284e422bdc24120e91ea2d8f2e1e4a3dea44189dc'
5
5
  SHA512:
6
- metadata.gz: 47a182f49f54bb58a30878cd37fd2424bb17a9f69c975fb340a336b9799de1cf5a469345dd9a512251ec88255ec5f6ce34add530836294955858fd579062c8ef
7
- data.tar.gz: 0220edca26e5c550882d37f02206ebd512ecd82df040ebe1eb12010a5db8499b2a5110d50716689805093fa61df81d4070da08958c196e5666baebbd33045b30
6
+ metadata.gz: 03ce95e1ed894df67208a6be15d61f36a8ba186553bf99f458244b07ba68eebe7c4d5a9ed7ff0d4739f05452f8a6ccd541c0462a760cd99bf9ce53bb61360220
7
+ data.tar.gz: edb1f2b02e369e0056291e7c55a2d0115cd2f09e8ad1896e027a4fbe9928494b304099bb0cd9ae4dd9a71a11395b0b98d6f1235e2346ba86a06f59d5aa0b50ec
@@ -16,19 +16,9 @@ jobs:
16
16
  strategy:
17
17
  fail-fast: false
18
18
  matrix:
19
- ruby: [ '2.7', '2.6', '2.5', '2.4' ]
19
+ ruby: [ '3.0', '2.7', '2.6', '2.5' ]
20
20
  os: [ ubuntu-latest, windows-latest, macos-latest ]
21
21
  experimental: [ false ]
22
- include:
23
- - ruby: '3.0'
24
- os: 'ubuntu-latest'
25
- experimental: true
26
- - ruby: '3.0'
27
- os: 'windows-latest'
28
- experimental: true
29
- - ruby: '3.0'
30
- os: 'macos-latest'
31
- experimental: true
32
22
  steps:
33
23
  - uses: actions/checkout@v2
34
24
  with:
data/.rubocop.yml CHANGED
@@ -5,6 +5,6 @@
5
5
  inherit_from:
6
6
  - https://raw.githubusercontent.com/riboseinc/oss-guides/master/ci/rubocop.yml
7
7
  AllCops:
8
- TargetRubyVersion: 2.4
8
+ TargetRubyVersion: 2.5
9
9
  Rails:
10
10
  Enabled: false
data/Gemfile CHANGED
@@ -3,5 +3,5 @@ source "https://rubygems.org"
3
3
  # Specify your gem's dependencies in relaton_w3c.gemspec
4
4
  gemspec
5
5
 
6
- gem "rake", "~> 12.0"
6
+ gem "rake", "~> 13.0"
7
7
  gem "rspec", "~> 3.0"
data/README.adoc CHANGED
@@ -115,6 +115,16 @@ RelatonW3c::W3cBibliography.get "W3C WD JSON-LD 1.1 2019-10-18"
115
115
  ...
116
116
  ----
117
117
 
118
+ === Typed links
119
+
120
+ Each W3C document has `src` type link.
121
+
122
+ [source,ruby]
123
+ ----
124
+ item.link
125
+ => [#<RelatonBib::TypedUri:0x00007fc533ed4040 @content=#<Addressable::URI:0xcc22c URI:https://www.w3.org/TR/2020/REC-json-ld11-20200716/>, @type="src">]
126
+ ----
127
+
118
128
  === Create bibliographic item from XML
119
129
  [source,ruby]
120
130
  ----
@@ -141,6 +151,25 @@ RelatonW3c::W3cBibliographicItem.new bib_hash
141
151
  ...
142
152
  ----
143
153
 
154
+ === Fetch data
155
+
156
+ There is a W3C dataset http://www.w3.org/2002/01/tr-automation/tr.rdf which can be converted into RelatonXML/BibXML/BibYAML formats.
157
+
158
+ The method `RelatonW3c::DataFetcher.fetch(output: "data", format: "yaml")` converts all the documents from the dataset and save them to the `./data` folder in YAML format.
159
+ Arguments:
160
+
161
+ - `output` - folder to save documents (default './data').
162
+ - `format` - format in which the documents are saved. Possimle formats are: `yaml`, `xml`, `bibxml` (default `yaml`).
163
+
164
+ [source,ruby]
165
+ ----
166
+ RelatonW3c::DataFetcher.fetch
167
+ Started at: 2021-11-19 13:32:05 +0100
168
+ Stopped at: 2021-11-19 13:34:40 +0100
169
+ Done in: 155 sec.
170
+ => nil
171
+ ----
172
+
144
173
  == Development
145
174
 
146
175
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -1,9 +1,10 @@
1
1
  <?xml version="1.0" encoding="UTF-8"?>
2
2
  <grammar xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
3
- <include href="biblio.rng"/>
4
- <start>
5
- <ref name="document"/>
6
- </start>
3
+ <include href="biblio.rng">
4
+ <start>
5
+ <ref name="document"/>
6
+ </start>
7
+ </include>
7
8
  <define name="document">
8
9
  <element name="document">
9
10
  <optional>
@@ -43,13 +44,16 @@
43
44
  </attribute>
44
45
  </optional>
45
46
  <optional>
46
- <ref name="section-title"/>
47
+ <attribute name="language"/>
47
48
  </optional>
48
49
  <optional>
49
- <ref name="BasicBlock"/>
50
+ <attribute name="script"/>
51
+ </optional>
52
+ <optional>
53
+ <ref name="section-title"/>
50
54
  </optional>
51
55
  <zeroOrMore>
52
- <ref name="note"/>
56
+ <ref name="BasicBlock"/>
53
57
  </zeroOrMore>
54
58
  </define>
55
59
  <define name="references">
@@ -92,6 +96,10 @@
92
96
  <ref name="example"/>
93
97
  <ref name="review"/>
94
98
  <ref name="pre"/>
99
+ <ref name="note"/>
100
+ <ref name="pagebreak"/>
101
+ <ref name="hr"/>
102
+ <ref name="bookmark"/>
95
103
  </choice>
96
104
  </define>
97
105
  <define name="paragraph">
@@ -188,6 +196,14 @@
188
196
  <data type="boolean"/>
189
197
  </attribute>
190
198
  </optional>
199
+ <optional>
200
+ <attribute name="subsequence"/>
201
+ </optional>
202
+ <optional>
203
+ <attribute name="inequality">
204
+ <data type="boolean"/>
205
+ </attribute>
206
+ </optional>
191
207
  <ref name="stem"/>
192
208
  <optional>
193
209
  <ref name="dl"/>
@@ -241,6 +257,9 @@
241
257
  <data type="boolean"/>
242
258
  </attribute>
243
259
  </optional>
260
+ <optional>
261
+ <attribute name="subsequence"/>
262
+ </optional>
244
263
  <optional>
245
264
  <attribute name="lang"/>
246
265
  </optional>
@@ -266,6 +285,9 @@
266
285
  <attribute name="id">
267
286
  <data type="ID"/>
268
287
  </attribute>
288
+ <optional>
289
+ <attribute name="alt"/>
290
+ </optional>
269
291
  <optional>
270
292
  <ref name="tname"/>
271
293
  </optional>
@@ -285,9 +307,15 @@
285
307
  <data type="boolean"/>
286
308
  </attribute>
287
309
  </optional>
310
+ <optional>
311
+ <attribute name="subsequence"/>
312
+ </optional>
288
313
  <optional>
289
314
  <attribute name="alt"/>
290
315
  </optional>
316
+ <optional>
317
+ <attribute name="summary"/>
318
+ </optional>
291
319
  <optional>
292
320
  <attribute name="uri">
293
321
  <data type="anyURI"/>
@@ -313,12 +341,16 @@
313
341
  </define>
314
342
  <define name="tname">
315
343
  <element name="name">
316
- <text/>
317
- </element>
318
- </define>
319
- <define name="tclass">
320
- <element name="name">
321
- <text/>
344
+ <oneOrMore>
345
+ <choice>
346
+ <ref name="PureTextElement"/>
347
+ <ref name="eref"/>
348
+ <ref name="stem"/>
349
+ <ref name="keyword"/>
350
+ <ref name="xref"/>
351
+ <ref name="hyperlink"/>
352
+ </choice>
353
+ </oneOrMore>
322
354
  </element>
323
355
  </define>
324
356
  <define name="thead">
@@ -370,6 +402,16 @@
370
402
  </choice>
371
403
  </attribute>
372
404
  </optional>
405
+ <optional>
406
+ <attribute name="valign">
407
+ <choice>
408
+ <value>top</value>
409
+ <value>middle</value>
410
+ <value>bottom</value>
411
+ <value>baseline</value>
412
+ </choice>
413
+ </attribute>
414
+ </optional>
373
415
  <choice>
374
416
  <zeroOrMore>
375
417
  <ref name="TextElement"/>
@@ -397,6 +439,16 @@
397
439
  </choice>
398
440
  </attribute>
399
441
  </optional>
442
+ <optional>
443
+ <attribute name="valign">
444
+ <choice>
445
+ <value>top</value>
446
+ <value>middle</value>
447
+ <value>bottom</value>
448
+ <value>baseline</value>
449
+ </choice>
450
+ </attribute>
451
+ </optional>
400
452
  <choice>
401
453
  <zeroOrMore>
402
454
  <ref name="TextElement"/>
@@ -417,6 +469,12 @@
417
469
  <data type="boolean"/>
418
470
  </attribute>
419
471
  </optional>
472
+ <optional>
473
+ <attribute name="subsequence"/>
474
+ </optional>
475
+ <optional>
476
+ <ref name="tname"/>
477
+ </optional>
420
478
  <oneOrMore>
421
479
  <choice>
422
480
  <ref name="formula"/>
@@ -481,17 +539,25 @@
481
539
  </attribute>
482
540
  </optional>
483
541
  <optional>
484
- <ref name="source"/>
542
+ <attribute name="subsequence"/>
485
543
  </optional>
486
544
  <optional>
487
- <ref name="tname"/>
545
+ <attribute name="class"/>
488
546
  </optional>
489
547
  <optional>
490
- <ref name="tclass"/>
548
+ <ref name="source"/>
549
+ </optional>
550
+ <optional>
551
+ <ref name="tname"/>
491
552
  </optional>
492
553
  <choice>
493
554
  <ref name="image"/>
555
+ <ref name="video"/>
556
+ <ref name="audio"/>
494
557
  <ref name="pre"/>
558
+ <oneOrMore>
559
+ <ref name="paragraph-with-footnote"/>
560
+ </oneOrMore>
495
561
  <zeroOrMore>
496
562
  <ref name="figure"/>
497
563
  </zeroOrMore>
@@ -529,6 +595,8 @@
529
595
  <ref name="pagebreak"/>
530
596
  <ref name="bookmark"/>
531
597
  <ref name="image"/>
598
+ <ref name="index"/>
599
+ <ref name="index-xref"/>
532
600
  </choice>
533
601
  </define>
534
602
  <define name="PureTextElement">
@@ -553,14 +621,20 @@
553
621
  <define name="em">
554
622
  <element name="em">
555
623
  <zeroOrMore>
556
- <ref name="PureTextElement"/>
624
+ <choice>
625
+ <ref name="PureTextElement"/>
626
+ <ref name="stem"/>
627
+ </choice>
557
628
  </zeroOrMore>
558
629
  </element>
559
630
  </define>
560
631
  <define name="strong">
561
632
  <element name="strong">
562
633
  <zeroOrMore>
563
- <ref name="PureTextElement"/>
634
+ <choice>
635
+ <ref name="PureTextElement"/>
636
+ <ref name="stem"/>
637
+ </choice>
564
638
  </zeroOrMore>
565
639
  </element>
566
640
  </define>
@@ -653,6 +727,65 @@
653
727
  <empty/>
654
728
  </element>
655
729
  </define>
730
+ <define name="index">
731
+ <element name="index">
732
+ <optional>
733
+ <attribute name="to">
734
+ <data type="IDREF"/>
735
+ </attribute>
736
+ </optional>
737
+ <element name="primary">
738
+ <oneOrMore>
739
+ <ref name="PureTextElement"/>
740
+ </oneOrMore>
741
+ </element>
742
+ <optional>
743
+ <element name="secondary">
744
+ <oneOrMore>
745
+ <ref name="PureTextElement"/>
746
+ </oneOrMore>
747
+ </element>
748
+ </optional>
749
+ <optional>
750
+ <element name="tertiary">
751
+ <oneOrMore>
752
+ <ref name="PureTextElement"/>
753
+ </oneOrMore>
754
+ </element>
755
+ </optional>
756
+ </element>
757
+ </define>
758
+ <define name="index-xref">
759
+ <element name="index-xref">
760
+ <attribute name="also">
761
+ <data type="boolean"/>
762
+ </attribute>
763
+ <element name="primary">
764
+ <oneOrMore>
765
+ <ref name="PureTextElement"/>
766
+ </oneOrMore>
767
+ </element>
768
+ <optional>
769
+ <element name="secondary">
770
+ <oneOrMore>
771
+ <ref name="PureTextElement"/>
772
+ </oneOrMore>
773
+ </element>
774
+ </optional>
775
+ <optional>
776
+ <element name="tertiary">
777
+ <oneOrMore>
778
+ <ref name="PureTextElement"/>
779
+ </oneOrMore>
780
+ </element>
781
+ </optional>
782
+ <element name="target">
783
+ <oneOrMore>
784
+ <ref name="PureTextElement"/>
785
+ </oneOrMore>
786
+ </element>
787
+ </element>
788
+ </define>
656
789
  <!-- bare ID element, used for referencing arbitrary spans of text -->
657
790
  <define name="bookmark">
658
791
  <element name="bookmark">
@@ -778,6 +911,9 @@
778
911
  <optional>
779
912
  <attribute name="alt"/>
780
913
  </optional>
914
+ <optional>
915
+ <attribute name="title"/>
916
+ </optional>
781
917
  <optional>
782
918
  <attribute name="longdesc">
783
919
  <data type="anyURI"/>
@@ -786,7 +922,7 @@
786
922
  </element>
787
923
  </define>
788
924
  <define name="video">
789
- <element name="image">
925
+ <element name="video">
790
926
  <attribute name="id">
791
927
  <data type="ID"/>
792
928
  </attribute>
@@ -816,6 +952,9 @@
816
952
  <optional>
817
953
  <attribute name="alt"/>
818
954
  </optional>
955
+ <optional>
956
+ <attribute name="title"/>
957
+ </optional>
819
958
  <optional>
820
959
  <attribute name="longdesc">
821
960
  <data type="anyURI"/>
@@ -827,7 +966,7 @@
827
966
  </element>
828
967
  </define>
829
968
  <define name="audio">
830
- <element name="image">
969
+ <element name="audio">
831
970
  <attribute name="id">
832
971
  <data type="ID"/>
833
972
  </attribute>
@@ -841,6 +980,9 @@
841
980
  <optional>
842
981
  <attribute name="alt"/>
843
982
  </optional>
983
+ <optional>
984
+ <attribute name="title"/>
985
+ </optional>
844
986
  <optional>
845
987
  <attribute name="longdesc">
846
988
  <data type="anyURI"/>
@@ -925,6 +1067,9 @@
925
1067
  <value>alphabet_upper</value>
926
1068
  </choice>
927
1069
  </attribute>
1070
+ <optional>
1071
+ <attribute name="start"/>
1072
+ </optional>
928
1073
  <oneOrMore>
929
1074
  <ref name="li"/>
930
1075
  </oneOrMore>
data/grammars/biblio.rng CHANGED
@@ -124,7 +124,7 @@
124
124
  <value>application/tei+xml</value>
125
125
  <value>text/x-asciidoc</value>
126
126
  <value>text/markdown</value>
127
- <value>application/x-isodoc+xml</value>
127
+ <value>application/x-metanorma+xml</value>
128
128
  <text/>
129
129
  </choice>
130
130
  </attribute>
@@ -452,6 +452,7 @@
452
452
  <attribute name="type">
453
453
  <choice>
454
454
  <value>isni</value>
455
+ <value>orcid</value>
455
456
  <value>uri</value>
456
457
  </choice>
457
458
  </attribute>
@@ -461,10 +462,7 @@
461
462
  <define name="org-identifier">
462
463
  <element name="identifier">
463
464
  <attribute name="type">
464
- <choice>
465
- <value>orcid</value>
466
- <value>uri</value>
467
- </choice>
465
+ <data type="string" datatypeLibrary=""/>
468
466
  </attribute>
469
467
  <text/>
470
468
  </element>
@@ -789,6 +787,7 @@
789
787
  <value>adapted</value>
790
788
  <value>vote-started</value>
791
789
  <value>vote-ended</value>
790
+ <value>announced</value>
792
791
  </choice>
793
792
  </define>
794
793
  <define name="bdate">
@@ -1106,7 +1105,7 @@
1106
1105
  <value>complementOf</value>
1107
1106
  <value>obsoletes</value>
1108
1107
  <value>obsoletedBy</value>
1109
- <value>cited</value>
1108
+ <value>cites</value>
1110
1109
  <value>isCitedIn</value>
1111
1110
  </choice>
1112
1111
  </define>
@@ -0,0 +1,106 @@
1
+ require "rdf"
2
+ require "linkeddata"
3
+ require "sparql"
4
+ require "mechanize"
5
+ require "relaton_w3c/data_parser"
6
+
7
+ module RelatonW3c
8
+ class DataFetcher
9
+ USED_TYPES = %w[WD NOTE PER PR REC CR].freeze
10
+
11
+ attr_reader :data, :group_names
12
+
13
+ #
14
+ # Data fetcher initializer
15
+ #
16
+ # @param [String] output directory to save files
17
+ # @param [String] format format of output files (xml, yaml, bibxml)
18
+ #
19
+ def initialize(output, format)
20
+ @output = output
21
+ @format = format
22
+ @ext = format.sub(/^bib/, "")
23
+ dir = File.dirname(File.expand_path(__FILE__))
24
+ @group_names = YAML.load_file(File.join(dir , "workgroups.yaml"))
25
+ @data = RDF::Repository.load("http://www.w3.org/2002/01/tr-automation/tr.rdf")
26
+ @files = []
27
+ end
28
+
29
+ #
30
+ # Initialize fetcher and run fetch
31
+ #
32
+ # @param [Strin] output directory to save files, default: "data"
33
+ # @param [Strin] format format of output files (xml, yaml, bibxml), default: yaml
34
+ #
35
+ def self.fetch(output: "data", format: "yaml")
36
+ t1 = Time.now
37
+ puts "Started at: #{t1}"
38
+ FileUtils.mkdir_p output unless Dir.exist? output
39
+ new(output, format).fetch
40
+ t2 = Time.now
41
+ puts "Stopped at: #{t2}"
42
+ puts "Done in: #{(t2 - t1).round} sec."
43
+ end
44
+
45
+ #
46
+ # Parse documents
47
+ #
48
+ def fetch
49
+ query.each { |sl| save_doc DataParser.parse(sl, self) }
50
+ end
51
+
52
+ #
53
+ # Query RDF source for documents
54
+ #
55
+ # @return [RDF::Query::Solutions] query results
56
+ #
57
+ def query # rubocop:disable Metrics/MethodLength
58
+ sse = SPARQL.parse(%(
59
+ PREFIX : <http://www.w3.org/2001/02pd/rec54#>
60
+ PREFIX dc: <http://purl.org/dc/elements/1.1/>
61
+ PREFIX doc: <http://www.w3.org/2000/10/swap/pim/doc#>
62
+ # PREFIX mat: <http://www.w3.org/2002/05/matrix/vocab#>
63
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
64
+ SELECT ?link ?title ?date
65
+ WHERE {
66
+ ?link dc:title ?title ; dc:date ?date . # ; doc:versionOf ?version_of .
67
+ }
68
+ ))
69
+ data.query sse
70
+ end
71
+
72
+ #
73
+ # Save document to file
74
+ #
75
+ # @param [RelatonW3c::W3cBibliographicItem, nil] bib bibliographic item
76
+ #
77
+ def save_doc(bib) # rubocop:disable Metrics/MethodLength
78
+ return unless bib
79
+
80
+ c = case @format
81
+ when "xml" then bib.to_xml(bibdata: true)
82
+ when "yaml" then bib.to_hash.to_yaml
83
+ else bib.send("to_#{@format}")
84
+ end
85
+ file = file_name(bib)
86
+ if @files.include? file
87
+ warn "File #{file} already exists. Document: #{bib.docnumber}"
88
+ else
89
+ @files << file
90
+ end
91
+ File.write file, c, encoding: "UTF-8"
92
+ end
93
+
94
+ #
95
+ # Generate file name
96
+ #
97
+ # @param [RelatonW3c::W3cBibliographicItem] bib bibliographic item
98
+ #
99
+ # @return [String] file name
100
+ #
101
+ def file_name(bib)
102
+ name = bib.docnumber.gsub(/[\s,:\/]/, "_").squeeze("_").upcase
103
+ File.join @output, "#{name}.#{@ext}"
104
+ end
105
+ end
106
+ end