relaton-gb 1.1.0 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/grammars/biblio.rng +1 -1
- data/grammars/isodoc.rng +427 -0
- data/grammars/isostandard.rng +3 -0
- data/lib/relaton_gb.rb +1 -1
- data/lib/relaton_gb/gb_bibliographic_item.rb +2 -26
- data/lib/relaton_gb/gb_scrapper.rb +1 -1
- data/lib/relaton_gb/hash_converter.rb +9 -0
- data/lib/relaton_gb/hit.rb +1 -1
- data/lib/relaton_gb/hit_collection.rb +3 -2
- data/lib/relaton_gb/scrapper.rb +7 -18
- data/lib/relaton_gb/sec_scrapper.rb +5 -3
- data/lib/relaton_gb/t_scrapper.rb +7 -10
- data/lib/relaton_gb/version.rb +1 -1
- data/lib/relaton_gb/xml_parser.rb +7 -15
- data/relaton_gb.gemspec +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6a2def68ed449438b4718929f3eac94284baa6be65232dc0f16564dc6f192f9a
|
4
|
+
data.tar.gz: 31deeb1c5d825866c432c67863ba55d4e55630ad3867df8e7adb2f5ae8045fbc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6b120c1edf6fcc8658a3dacca1806d26913c96399b2ef22e0cdc3c24079d38af7b9be633860f6e421f734d29851578675f29bd4cc69e7f97c7a2c7ce0a94bf9f
|
7
|
+
data.tar.gz: f546268018b661693313f3821e0a9dd86cec929b85ee1d60e3bb93ced58e95ebab9ba172474699a8fcafeb6804487071551a465c76c0741f69a4933bcad3c997
|
data/grammars/biblio.rng
CHANGED
@@ -532,7 +532,7 @@
|
|
532
532
|
</define>
|
533
533
|
<define name="LocalityType">
|
534
534
|
<data type="string">
|
535
|
-
<param name="pattern">section|clause|part|paragraph|chapter|page|whole|table|annex|figure|note|list|example|volume|issue|time|locality:[a-zA-Z0-9_]+</param>
|
535
|
+
<param name="pattern">section|clause|part|paragraph|chapter|page|whole|table|annex|figure|note|list|example|volume|issue|time|anchor|locality:[a-zA-Z0-9_]+</param>
|
536
536
|
</data>
|
537
537
|
</define>
|
538
538
|
<define name="referenceFrom">
|
data/grammars/isodoc.rng
CHANGED
@@ -64,6 +64,85 @@
|
|
64
64
|
<text/>
|
65
65
|
</element>
|
66
66
|
</define>
|
67
|
+
<define name="ul">
|
68
|
+
<element name="ul">
|
69
|
+
<attribute name="id">
|
70
|
+
<data type="ID"/>
|
71
|
+
</attribute>
|
72
|
+
<optional>
|
73
|
+
<attribute name="keep-with-next">
|
74
|
+
<data type="boolean"/>
|
75
|
+
</attribute>
|
76
|
+
</optional>
|
77
|
+
<optional>
|
78
|
+
<attribute name="keep-lines-together">
|
79
|
+
<data type="boolean"/>
|
80
|
+
</attribute>
|
81
|
+
</optional>
|
82
|
+
<oneOrMore>
|
83
|
+
<ref name="li"/>
|
84
|
+
</oneOrMore>
|
85
|
+
<zeroOrMore>
|
86
|
+
<ref name="note"/>
|
87
|
+
</zeroOrMore>
|
88
|
+
</element>
|
89
|
+
</define>
|
90
|
+
<define name="ol">
|
91
|
+
<element name="ol">
|
92
|
+
<attribute name="id">
|
93
|
+
<data type="ID"/>
|
94
|
+
</attribute>
|
95
|
+
<optional>
|
96
|
+
<attribute name="keep-with-next">
|
97
|
+
<data type="boolean"/>
|
98
|
+
</attribute>
|
99
|
+
</optional>
|
100
|
+
<optional>
|
101
|
+
<attribute name="keep-lines-together">
|
102
|
+
<data type="boolean"/>
|
103
|
+
</attribute>
|
104
|
+
</optional>
|
105
|
+
<attribute name="type">
|
106
|
+
<choice>
|
107
|
+
<value>roman</value>
|
108
|
+
<value>alphabet</value>
|
109
|
+
<value>arabic</value>
|
110
|
+
<value>roman_upper</value>
|
111
|
+
<value>alphabet_upper</value>
|
112
|
+
</choice>
|
113
|
+
</attribute>
|
114
|
+
<oneOrMore>
|
115
|
+
<ref name="li"/>
|
116
|
+
</oneOrMore>
|
117
|
+
<zeroOrMore>
|
118
|
+
<ref name="note"/>
|
119
|
+
</zeroOrMore>
|
120
|
+
</element>
|
121
|
+
</define>
|
122
|
+
<define name="dl">
|
123
|
+
<element name="dl">
|
124
|
+
<attribute name="id">
|
125
|
+
<data type="ID"/>
|
126
|
+
</attribute>
|
127
|
+
<optional>
|
128
|
+
<attribute name="keep-with-next">
|
129
|
+
<data type="boolean"/>
|
130
|
+
</attribute>
|
131
|
+
</optional>
|
132
|
+
<optional>
|
133
|
+
<attribute name="keep-lines-together">
|
134
|
+
<data type="boolean"/>
|
135
|
+
</attribute>
|
136
|
+
</optional>
|
137
|
+
<oneOrMore>
|
138
|
+
<ref name="dt"/>
|
139
|
+
<ref name="dd"/>
|
140
|
+
</oneOrMore>
|
141
|
+
<zeroOrMore>
|
142
|
+
<ref name="note"/>
|
143
|
+
</zeroOrMore>
|
144
|
+
</element>
|
145
|
+
</define>
|
67
146
|
<define name="example">
|
68
147
|
<element name="example">
|
69
148
|
<attribute name="id">
|
@@ -77,6 +156,19 @@
|
|
77
156
|
<optional>
|
78
157
|
<attribute name="subsequence"/>
|
79
158
|
</optional>
|
159
|
+
<optional>
|
160
|
+
<attribute name="number"/>
|
161
|
+
</optional>
|
162
|
+
<optional>
|
163
|
+
<attribute name="keep-with-next">
|
164
|
+
<data type="boolean"/>
|
165
|
+
</attribute>
|
166
|
+
</optional>
|
167
|
+
<optional>
|
168
|
+
<attribute name="keep-lines-together">
|
169
|
+
<data type="boolean"/>
|
170
|
+
</attribute>
|
171
|
+
</optional>
|
80
172
|
<optional>
|
81
173
|
<ref name="tname"/>
|
82
174
|
</optional>
|
@@ -97,6 +189,296 @@
|
|
97
189
|
</zeroOrMore>
|
98
190
|
</element>
|
99
191
|
</define>
|
192
|
+
<define name="table">
|
193
|
+
<element name="table">
|
194
|
+
<attribute name="id">
|
195
|
+
<data type="ID"/>
|
196
|
+
</attribute>
|
197
|
+
<optional>
|
198
|
+
<attribute name="unnumbered">
|
199
|
+
<data type="boolean"/>
|
200
|
+
</attribute>
|
201
|
+
</optional>
|
202
|
+
<optional>
|
203
|
+
<attribute name="number"/>
|
204
|
+
</optional>
|
205
|
+
<optional>
|
206
|
+
<attribute name="subsequence"/>
|
207
|
+
</optional>
|
208
|
+
<optional>
|
209
|
+
<attribute name="alt"/>
|
210
|
+
</optional>
|
211
|
+
<optional>
|
212
|
+
<attribute name="summary"/>
|
213
|
+
</optional>
|
214
|
+
<optional>
|
215
|
+
<attribute name="uri">
|
216
|
+
<data type="anyURI"/>
|
217
|
+
</attribute>
|
218
|
+
</optional>
|
219
|
+
<optional>
|
220
|
+
<attribute name="keep-with-next">
|
221
|
+
<data type="boolean"/>
|
222
|
+
</attribute>
|
223
|
+
</optional>
|
224
|
+
<optional>
|
225
|
+
<attribute name="keep-lines-together">
|
226
|
+
<data type="boolean"/>
|
227
|
+
</attribute>
|
228
|
+
</optional>
|
229
|
+
<optional>
|
230
|
+
<ref name="tname"/>
|
231
|
+
</optional>
|
232
|
+
<optional>
|
233
|
+
<ref name="thead"/>
|
234
|
+
</optional>
|
235
|
+
<ref name="tbody"/>
|
236
|
+
<optional>
|
237
|
+
<ref name="tfoot"/>
|
238
|
+
</optional>
|
239
|
+
<zeroOrMore>
|
240
|
+
<ref name="table-note"/>
|
241
|
+
</zeroOrMore>
|
242
|
+
<optional>
|
243
|
+
<ref name="dl"/>
|
244
|
+
</optional>
|
245
|
+
</element>
|
246
|
+
</define>
|
247
|
+
<define name="figure">
|
248
|
+
<element name="figure">
|
249
|
+
<attribute name="id">
|
250
|
+
<data type="ID"/>
|
251
|
+
</attribute>
|
252
|
+
<optional>
|
253
|
+
<attribute name="unnumbered">
|
254
|
+
<data type="boolean"/>
|
255
|
+
</attribute>
|
256
|
+
</optional>
|
257
|
+
<optional>
|
258
|
+
<attribute name="number"/>
|
259
|
+
</optional>
|
260
|
+
<optional>
|
261
|
+
<attribute name="subsequence"/>
|
262
|
+
</optional>
|
263
|
+
<optional>
|
264
|
+
<attribute name="keep-with-next">
|
265
|
+
<data type="boolean"/>
|
266
|
+
</attribute>
|
267
|
+
</optional>
|
268
|
+
<optional>
|
269
|
+
<attribute name="keep-lines-together">
|
270
|
+
<data type="boolean"/>
|
271
|
+
</attribute>
|
272
|
+
</optional>
|
273
|
+
<optional>
|
274
|
+
<attribute name="class"/>
|
275
|
+
</optional>
|
276
|
+
<optional>
|
277
|
+
<ref name="source"/>
|
278
|
+
</optional>
|
279
|
+
<optional>
|
280
|
+
<ref name="tname"/>
|
281
|
+
</optional>
|
282
|
+
<choice>
|
283
|
+
<ref name="image"/>
|
284
|
+
<ref name="video"/>
|
285
|
+
<ref name="audio"/>
|
286
|
+
<ref name="pre"/>
|
287
|
+
<oneOrMore>
|
288
|
+
<ref name="paragraph-with-footnote"/>
|
289
|
+
</oneOrMore>
|
290
|
+
<zeroOrMore>
|
291
|
+
<ref name="figure"/>
|
292
|
+
</zeroOrMore>
|
293
|
+
</choice>
|
294
|
+
<zeroOrMore>
|
295
|
+
<ref name="fn"/>
|
296
|
+
</zeroOrMore>
|
297
|
+
<optional>
|
298
|
+
<ref name="dl"/>
|
299
|
+
</optional>
|
300
|
+
<zeroOrMore>
|
301
|
+
<ref name="note"/>
|
302
|
+
</zeroOrMore>
|
303
|
+
</element>
|
304
|
+
</define>
|
305
|
+
<define name="sourcecode">
|
306
|
+
<element name="sourcecode">
|
307
|
+
<attribute name="id">
|
308
|
+
<data type="ID"/>
|
309
|
+
</attribute>
|
310
|
+
<optional>
|
311
|
+
<attribute name="unnumbered">
|
312
|
+
<data type="boolean"/>
|
313
|
+
</attribute>
|
314
|
+
</optional>
|
315
|
+
<optional>
|
316
|
+
<attribute name="number"/>
|
317
|
+
</optional>
|
318
|
+
<optional>
|
319
|
+
<attribute name="subsequence"/>
|
320
|
+
</optional>
|
321
|
+
<optional>
|
322
|
+
<attribute name="keep-with-next">
|
323
|
+
<data type="boolean"/>
|
324
|
+
</attribute>
|
325
|
+
</optional>
|
326
|
+
<optional>
|
327
|
+
<attribute name="keep-lines-together">
|
328
|
+
<data type="boolean"/>
|
329
|
+
</attribute>
|
330
|
+
</optional>
|
331
|
+
<optional>
|
332
|
+
<attribute name="lang"/>
|
333
|
+
</optional>
|
334
|
+
<optional>
|
335
|
+
<ref name="tname"/>
|
336
|
+
</optional>
|
337
|
+
<oneOrMore>
|
338
|
+
<choice>
|
339
|
+
<text/>
|
340
|
+
<ref name="callout"/>
|
341
|
+
</choice>
|
342
|
+
</oneOrMore>
|
343
|
+
<zeroOrMore>
|
344
|
+
<ref name="annotation"/>
|
345
|
+
</zeroOrMore>
|
346
|
+
<zeroOrMore>
|
347
|
+
<ref name="note"/>
|
348
|
+
</zeroOrMore>
|
349
|
+
</element>
|
350
|
+
</define>
|
351
|
+
<define name="formula">
|
352
|
+
<element name="formula">
|
353
|
+
<attribute name="id">
|
354
|
+
<data type="ID"/>
|
355
|
+
</attribute>
|
356
|
+
<optional>
|
357
|
+
<attribute name="unnumbered">
|
358
|
+
<data type="boolean"/>
|
359
|
+
</attribute>
|
360
|
+
</optional>
|
361
|
+
<optional>
|
362
|
+
<attribute name="number"/>
|
363
|
+
</optional>
|
364
|
+
<optional>
|
365
|
+
<attribute name="subsequence"/>
|
366
|
+
</optional>
|
367
|
+
<optional>
|
368
|
+
<attribute name="keep-with-next">
|
369
|
+
<data type="boolean"/>
|
370
|
+
</attribute>
|
371
|
+
</optional>
|
372
|
+
<optional>
|
373
|
+
<attribute name="keep-lines-together">
|
374
|
+
<data type="boolean"/>
|
375
|
+
</attribute>
|
376
|
+
</optional>
|
377
|
+
<optional>
|
378
|
+
<attribute name="inequality">
|
379
|
+
<data type="boolean"/>
|
380
|
+
</attribute>
|
381
|
+
</optional>
|
382
|
+
<ref name="stem"/>
|
383
|
+
<optional>
|
384
|
+
<ref name="dl"/>
|
385
|
+
</optional>
|
386
|
+
<zeroOrMore>
|
387
|
+
<ref name="note"/>
|
388
|
+
</zeroOrMore>
|
389
|
+
</element>
|
390
|
+
</define>
|
391
|
+
<define name="ParagraphType">
|
392
|
+
<attribute name="id">
|
393
|
+
<data type="ID"/>
|
394
|
+
</attribute>
|
395
|
+
<optional>
|
396
|
+
<attribute name="align">
|
397
|
+
<ref name="Alignments"/>
|
398
|
+
</attribute>
|
399
|
+
</optional>
|
400
|
+
<optional>
|
401
|
+
<attribute name="keep-with-next">
|
402
|
+
<data type="boolean"/>
|
403
|
+
</attribute>
|
404
|
+
</optional>
|
405
|
+
<optional>
|
406
|
+
<attribute name="keep-lines-together">
|
407
|
+
<data type="boolean"/>
|
408
|
+
</attribute>
|
409
|
+
</optional>
|
410
|
+
<zeroOrMore>
|
411
|
+
<ref name="TextElement"/>
|
412
|
+
</zeroOrMore>
|
413
|
+
<zeroOrMore>
|
414
|
+
<ref name="note"/>
|
415
|
+
</zeroOrMore>
|
416
|
+
</define>
|
417
|
+
<define name="paragraph-with-footnote">
|
418
|
+
<element name="p">
|
419
|
+
<attribute name="id">
|
420
|
+
<data type="ID"/>
|
421
|
+
</attribute>
|
422
|
+
<optional>
|
423
|
+
<attribute name="align">
|
424
|
+
<ref name="Alignments"/>
|
425
|
+
</attribute>
|
426
|
+
</optional>
|
427
|
+
<optional>
|
428
|
+
<attribute name="keep-with-next">
|
429
|
+
<data type="boolean"/>
|
430
|
+
</attribute>
|
431
|
+
</optional>
|
432
|
+
<optional>
|
433
|
+
<attribute name="keep-lines-together">
|
434
|
+
<data type="boolean"/>
|
435
|
+
</attribute>
|
436
|
+
</optional>
|
437
|
+
<zeroOrMore>
|
438
|
+
<choice>
|
439
|
+
<ref name="TextElement"/>
|
440
|
+
<ref name="fn"/>
|
441
|
+
</choice>
|
442
|
+
</zeroOrMore>
|
443
|
+
<zeroOrMore>
|
444
|
+
<ref name="note"/>
|
445
|
+
</zeroOrMore>
|
446
|
+
</element>
|
447
|
+
</define>
|
448
|
+
<define name="quote">
|
449
|
+
<element name="quote">
|
450
|
+
<attribute name="id">
|
451
|
+
<data type="ID"/>
|
452
|
+
</attribute>
|
453
|
+
<optional>
|
454
|
+
<attribute name="alignment">
|
455
|
+
<ref name="Alignments"/>
|
456
|
+
</attribute>
|
457
|
+
</optional>
|
458
|
+
<optional>
|
459
|
+
<attribute name="keep-with-next">
|
460
|
+
<data type="boolean"/>
|
461
|
+
</attribute>
|
462
|
+
</optional>
|
463
|
+
<optional>
|
464
|
+
<attribute name="keep-lines-together">
|
465
|
+
<data type="boolean"/>
|
466
|
+
</attribute>
|
467
|
+
</optional>
|
468
|
+
<optional>
|
469
|
+
<ref name="quote-source"/>
|
470
|
+
</optional>
|
471
|
+
<optional>
|
472
|
+
<ref name="quote-author"/>
|
473
|
+
</optional>
|
474
|
+
<oneOrMore>
|
475
|
+
<ref name="paragraph-with-footnote"/>
|
476
|
+
</oneOrMore>
|
477
|
+
<zeroOrMore>
|
478
|
+
<ref name="note"/>
|
479
|
+
</zeroOrMore>
|
480
|
+
</element>
|
481
|
+
</define>
|
100
482
|
<define name="BibDataExtensionType">
|
101
483
|
<ref name="doctype"/>
|
102
484
|
<optional>
|
@@ -165,6 +547,30 @@
|
|
165
547
|
<attribute name="id">
|
166
548
|
<data type="ID"/>
|
167
549
|
</attribute>
|
550
|
+
<optional>
|
551
|
+
<attribute name="unnumbered">
|
552
|
+
<data type="boolean"/>
|
553
|
+
</attribute>
|
554
|
+
</optional>
|
555
|
+
<optional>
|
556
|
+
<attribute name="number"/>
|
557
|
+
</optional>
|
558
|
+
<optional>
|
559
|
+
<attribute name="subsequence"/>
|
560
|
+
</optional>
|
561
|
+
<optional>
|
562
|
+
<attribute name="keep-with-next">
|
563
|
+
<data type="boolean"/>
|
564
|
+
</attribute>
|
565
|
+
</optional>
|
566
|
+
<optional>
|
567
|
+
<attribute name="keep-lines-together">
|
568
|
+
<data type="boolean"/>
|
569
|
+
</attribute>
|
570
|
+
</optional>
|
571
|
+
<optional>
|
572
|
+
<attribute name="type"/>
|
573
|
+
</optional>
|
168
574
|
<oneOrMore>
|
169
575
|
<choice>
|
170
576
|
<ref name="paragraph"/>
|
@@ -910,6 +1316,27 @@
|
|
910
1316
|
<attribute name="id">
|
911
1317
|
<data type="ID"/>
|
912
1318
|
</attribute>
|
1319
|
+
<optional>
|
1320
|
+
<attribute name="unnumbered">
|
1321
|
+
<data type="boolean"/>
|
1322
|
+
</attribute>
|
1323
|
+
</optional>
|
1324
|
+
<optional>
|
1325
|
+
<attribute name="number"/>
|
1326
|
+
</optional>
|
1327
|
+
<optional>
|
1328
|
+
<attribute name="subsequence"/>
|
1329
|
+
</optional>
|
1330
|
+
<optional>
|
1331
|
+
<attribute name="keep-with-next">
|
1332
|
+
<data type="boolean"/>
|
1333
|
+
</attribute>
|
1334
|
+
</optional>
|
1335
|
+
<optional>
|
1336
|
+
<attribute name="keep-lines-together">
|
1337
|
+
<data type="boolean"/>
|
1338
|
+
</attribute>
|
1339
|
+
</optional>
|
913
1340
|
<oneOrMore>
|
914
1341
|
<choice>
|
915
1342
|
<ref name="paragraph"/>
|
data/grammars/isostandard.rng
CHANGED
data/lib/relaton_gb.rb
CHANGED
@@ -35,6 +35,7 @@ module RelatonGb
|
|
35
35
|
@ccs = args[:ccs].map { |c| c.is_a?(Cnccs::Ccs) ? c : Cnccs.fetch(c) }
|
36
36
|
@gbtype = GbStandardType.new args[:gbtype]
|
37
37
|
@gbplannumber = args[:gbplannumber] || structuredidentifier&.project_number
|
38
|
+
# @doctype = args[:doctype]
|
38
39
|
end
|
39
40
|
|
40
41
|
# @param builder [Nokogiri::XML::Builder]
|
@@ -61,9 +62,7 @@ module RelatonGb
|
|
61
62
|
|
62
63
|
# @return [String]
|
63
64
|
def inspect
|
64
|
-
"<#{self.class}:#{format('
|
65
|
-
# "@fullIdentifier=\"#{@fetch&.shortref}\" "\
|
66
|
-
# "@title=\"#{title}\">"
|
65
|
+
"<#{self.class}:#{format('%<id>#.14x', id: object_id << 1)}>"
|
67
66
|
end
|
68
67
|
|
69
68
|
# @return [String]
|
@@ -76,34 +75,11 @@ module RelatonGb
|
|
76
75
|
|
77
76
|
id ||= @docidentifier.reject { |i| i.type == "DOI" }[0]
|
78
77
|
idstr = id.id
|
79
|
-
# if id.part_number&.size&.positive?
|
80
|
-
# idstr = idstr + "-#{id.part_number}"
|
81
|
-
# end
|
82
78
|
idstr.gsub(/\s/, "").strip
|
83
79
|
end
|
84
80
|
|
85
81
|
private
|
86
82
|
|
87
|
-
# Overraides IsoBibliographicItem method.
|
88
|
-
# @param language [Array<String>]
|
89
|
-
# @raise ArgumentError
|
90
|
-
def check_language(language)
|
91
|
-
language.each do |lang|
|
92
|
-
unless %w[en zh].include? lang
|
93
|
-
raise ArgumentError, "invalid language: #{lang}"
|
94
|
-
end
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
# Overraides IsoBibliographicItem method.
|
99
|
-
# @param script [Array<String>]
|
100
|
-
# @raise ArgumentError
|
101
|
-
def check_script(script)
|
102
|
-
script.each do |scr|
|
103
|
-
raise ArgumentError, "invalid script: #{scr}" unless %w[Latn Hans].include? scr
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
83
|
# @param builder [Nokogiri::XML::Builder]
|
108
84
|
def render_gbxml(builder)
|
109
85
|
gbtype.to_xml builder
|
@@ -16,7 +16,7 @@ module RelatonGb
|
|
16
16
|
# @return [RelatonGb::HitCollection]
|
17
17
|
def scrape_page(text)
|
18
18
|
search_html = OpenURI.open_uri(
|
19
|
-
"http://openstd.samr.gov.cn/bzgk/gb/std_list?p.p2=" + text
|
19
|
+
"http://openstd.samr.gov.cn/bzgk/gb/std_list?p.p2=" + text,
|
20
20
|
)
|
21
21
|
result = Nokogiri::HTML search_html
|
22
22
|
hits = result.xpath(
|
@@ -17,6 +17,15 @@ module RelatonGb
|
|
17
17
|
|
18
18
|
private
|
19
19
|
|
20
|
+
#
|
21
|
+
# Ovverides superclass's method
|
22
|
+
#
|
23
|
+
# @param item [Hash]
|
24
|
+
# @retirn [RelatonGb::GbBibliographicItem]
|
25
|
+
def bib_item(item)
|
26
|
+
GbBibliographicItem.new(item)
|
27
|
+
end
|
28
|
+
|
20
29
|
def ccs_hash_to_bib(ret)
|
21
30
|
ret[:ccs] = array(ret[:ccs]).map do |ccs|
|
22
31
|
ccs[:code] ? Cnccs.fetch(ccs[:code]) : Cnccs.fetch(ccs)
|
data/lib/relaton_gb/hit.rb
CHANGED
@@ -38,7 +38,7 @@ module RelatonGb
|
|
38
38
|
|
39
39
|
# @return [String]
|
40
40
|
def inspect
|
41
|
-
"<#{self.class}:#{format('
|
41
|
+
"<#{self.class}:#{format('%<id>#.14x', id: object_id << 1)} "\
|
42
42
|
"@fullIdentifier=\"#{@fetch&.shortref}\" "\
|
43
43
|
"@docref=\"#{docref}\">"
|
44
44
|
end
|
@@ -5,10 +5,11 @@ module RelatonGb
|
|
5
5
|
class HitCollection < RelatonBib::HitCollection
|
6
6
|
# @param hits [Array<Hash>]
|
7
7
|
# @param hit_pages [Integer]
|
8
|
-
# @param scrapper [RelatonGb::GbScrapper, RelatonGb::SecScrapper,
|
8
|
+
# @param scrapper [RelatonGb::GbScrapper, RelatonGb::SecScrapper,
|
9
|
+
# RelatonGb::TScrapper]
|
9
10
|
def initialize(hits = [])
|
10
11
|
@array = hits
|
11
|
-
@fetched
|
12
|
+
@fetched = false
|
12
13
|
end
|
13
14
|
end
|
14
15
|
end
|
data/lib/relaton_gb/scrapper.rb
CHANGED
@@ -78,19 +78,14 @@ module RelatonGb
|
|
78
78
|
end
|
79
79
|
|
80
80
|
# @param doc [Nokogiri::HTML::Document]
|
81
|
-
# @return [Array<
|
82
|
-
# * :title_intro [String]
|
83
|
-
# * :title_main [String]
|
84
|
-
# * :language [String]
|
85
|
-
# * :script [String]
|
81
|
+
# @return [Array<RelatonBib::TypedTitleString>]
|
86
82
|
def get_titles(doc)
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
titles
|
83
|
+
tzh = doc.at("//td[contains(text(), '中文标准名称')]/b").text
|
84
|
+
titles = RelatonBib::TypedTitleString.from_string tzh, "zh", "Hans"
|
85
|
+
ten = doc.at("//td[contains(text(), '英文标准名称')]").text.match(/[\w\s]+/).to_s
|
86
|
+
return titles if ten.empty?
|
87
|
+
|
88
|
+
titles + RelatonBib::TypedTitleString.from_string(ten, "en", "Latn")
|
94
89
|
end
|
95
90
|
|
96
91
|
def get_type
|
@@ -123,12 +118,6 @@ module RelatonGb
|
|
123
118
|
mandate: get_mandate(ref), topic: "other" }
|
124
119
|
end
|
125
120
|
|
126
|
-
# @param doc [Nokogiri::HTML::Document]
|
127
|
-
# @return [String]
|
128
|
-
# def get_ref(doc)
|
129
|
-
# doc.xpath('//dt[text()="标准号"]/following-sibling::dd[1]').text
|
130
|
-
# end
|
131
|
-
|
132
121
|
# @param doc [Nokogiri::HTML::Document]
|
133
122
|
# @return [Array<String>]
|
134
123
|
def get_ccs(doc)
|
@@ -58,13 +58,15 @@ module RelatonGb
|
|
58
58
|
# * :language [String]
|
59
59
|
# * :script [String]
|
60
60
|
def get_titles(doc)
|
61
|
-
titles = [{ title_main: doc.at("//h4").text.delete("\r\n\t"),
|
62
|
-
|
61
|
+
# titles = [{ title_main: doc.at("//h4").text.delete("\r\n\t"),
|
62
|
+
# title_intro: nil, language: "zh", script: "Hans" }]
|
63
|
+
tzh = doc.at("//h4").text.delete("\r\n\t")
|
64
|
+
RelatonBib::TypedTitleString.from_string tzh, "zh", "Hans"
|
63
65
|
# title_main = doc.at("//td[contains(text(), '英文标准名称')]").text.match(/[\w\s]+/).to_s
|
64
66
|
# unless title_main.empty?
|
65
67
|
# titles << { title_main: title_main, title_intro: nil, language: "en", script: "Latn" }
|
66
68
|
# end
|
67
|
-
titles
|
69
|
+
# titles
|
68
70
|
end
|
69
71
|
|
70
72
|
# @param _doc [Nokogiri::HTML::Document]
|
@@ -84,16 +84,13 @@ module RelatonGb
|
|
84
84
|
end
|
85
85
|
|
86
86
|
def get_titles(doc)
|
87
|
-
|
88
|
-
titles =
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
script: "Latn" }
|
95
|
-
end
|
96
|
-
titles
|
87
|
+
xpz = '//td[contains(.,"中文标题")]/following-sibling::td[1]'
|
88
|
+
titles = RelatonBib::TypedTitleString.from_string doc.at(xpz).text, "zh", "Hans"
|
89
|
+
xpe = '//td[contains(.,"英文标题")]/following-sibling::td[1]'
|
90
|
+
ten = doc.xpath(xpe).text
|
91
|
+
return titles if ten.empty?
|
92
|
+
|
93
|
+
titles + RelatonBib::TypedTitleString.from_string(ten, "en", "Latn")
|
97
94
|
end
|
98
95
|
|
99
96
|
def gbtype
|
data/lib/relaton_gb/version.rb
CHANGED
@@ -3,18 +3,15 @@ require "nokogiri"
|
|
3
3
|
module RelatonGb
|
4
4
|
class XMLParser < RelatonIsoBib::XMLParser
|
5
5
|
class << self
|
6
|
-
def from_xml(xml)
|
7
|
-
doc = Nokogiri::XML(xml)
|
8
|
-
gbitem = doc.at "/bibitem|/bibdata"
|
9
|
-
if gbitem
|
10
|
-
GbBibliographicItem.new item_data(gbitem)
|
11
|
-
else
|
12
|
-
warn "[relato-gb] can't find bibitem or bibdata element in the XML"
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
6
|
private
|
17
7
|
|
8
|
+
# override RelatonBib::BibliographicItem.bib_item method
|
9
|
+
# @param item_hash [Hash]
|
10
|
+
# @return [RelatonGb::GbBibliographicItem]
|
11
|
+
def bib_item(item_hash)
|
12
|
+
GbBibliographicItem.new item_hash
|
13
|
+
end
|
14
|
+
|
18
15
|
def item_data(gbitem)
|
19
16
|
data = super
|
20
17
|
data[:committee] = fetch_committee gbitem
|
@@ -24,11 +21,6 @@ module RelatonGb
|
|
24
21
|
data
|
25
22
|
end
|
26
23
|
|
27
|
-
# Overrade get_id from RelatonIsoBib::XMLParser
|
28
|
-
# def get_id(did)
|
29
|
-
# did.text.match(/^(?<project>.*?\d+)(?<hyphen>-)?(?(<hyphen>)(?<year>\d*))/)
|
30
|
-
# end
|
31
|
-
|
32
24
|
def fetch_committee(doc)
|
33
25
|
committee = doc.at "./ext/gbcommittee"
|
34
26
|
return nil unless committee
|
data/relaton_gb.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: relaton-gb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-06-
|
11
|
+
date: 2020-06-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: debase
|
@@ -184,14 +184,14 @@ dependencies:
|
|
184
184
|
requirements:
|
185
185
|
- - ">="
|
186
186
|
- !ruby/object:Gem::Version
|
187
|
-
version: 1.
|
187
|
+
version: 1.2.0
|
188
188
|
type: :runtime
|
189
189
|
prerelease: false
|
190
190
|
version_requirements: !ruby/object:Gem::Requirement
|
191
191
|
requirements:
|
192
192
|
- - ">="
|
193
193
|
- !ruby/object:Gem::Version
|
194
|
-
version: 1.
|
194
|
+
version: 1.2.0
|
195
195
|
description: 'RelatonGb: retrieve Chinese GB Standards for bibliographic use using
|
196
196
|
the BibliographicItem model.'
|
197
197
|
email:
|