gbbib 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +5 -3
- data/gbbib.gemspec +1 -0
- data/lib/gbbib/gb_bibliographic_item.rb +9 -0
- data/lib/gbbib/scrapper.rb +14 -4
- data/lib/gbbib/version.rb +1 -1
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b3e3ee80ed7fef30b07a956b8ec13313615c0e164ac523824fff69e63850539f
|
4
|
+
data.tar.gz: 9a753e31b51f631f710f96d3f38ba6bff254980f4faa5ed51571288acf5dc40d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c5ed82a9de4bb8cf57de8d5271f0e3de47f64496509581600dc2f398e64042adc621f2a3ac3ff5321f11bddfea5ac5b5a11c9dca35ae642e702de8fc7624e5b4
|
7
|
+
data.tar.gz: 21954b24468a3ea22ad0db05b606be95b37a19e1dae13d35d9ce2a766ba212366978f35d3991bb24b6d403bb8cc26c9d7c7bdbc0477c58c1d760839b5e01d03d
|
data/Gemfile.lock
CHANGED
@@ -1,21 +1,23 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
gbbib (0.2.
|
4
|
+
gbbib (0.2.2)
|
5
5
|
cnccs (~> 0.1.1)
|
6
|
+
gb-agencies (~> 0.0.1)
|
6
7
|
iso-bib-item (~> 0.2)
|
7
8
|
|
8
9
|
GEM
|
9
10
|
remote: https://rubygems.org/
|
10
11
|
specs:
|
11
12
|
byebug (10.0.2)
|
12
|
-
cnccs (0.1.
|
13
|
+
cnccs (0.1.2)
|
13
14
|
coderay (1.1.2)
|
14
15
|
diff-lcs (1.3)
|
15
16
|
docile (1.3.1)
|
16
17
|
equivalent-xml (0.6.0)
|
17
18
|
nokogiri (>= 1.4.3)
|
18
|
-
|
19
|
+
gb-agencies (0.0.1)
|
20
|
+
iso-bib-item (0.2.3)
|
19
21
|
isoics (~> 0.1.6)
|
20
22
|
nokogiri (~> 1.8.4)
|
21
23
|
ruby_deep_clone (~> 0.8.0)
|
data/gbbib.gemspec
CHANGED
@@ -58,6 +58,15 @@ module Gbbib
|
|
58
58
|
inspect
|
59
59
|
end
|
60
60
|
|
61
|
+
def id(attribute, delim = '')
|
62
|
+
return nil if attribute && !@id_attribute
|
63
|
+
idstr = "#{@docidentifier.prefix}#{delim}#{@docidentifier.project_number}"
|
64
|
+
if @docidentifier.part_number&.size&.positive?
|
65
|
+
idstr = idstr + "-#{@docidentifier.part_number}"
|
66
|
+
end
|
67
|
+
idstr.strip
|
68
|
+
end
|
69
|
+
|
61
70
|
private
|
62
71
|
|
63
72
|
# @param builder [Nokogiri::XML::Builder]
|
data/lib/gbbib/scrapper.rb
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
|
-
require
|
4
|
+
require "yaml"
|
5
|
+
require "gb_agencies"
|
5
6
|
|
6
7
|
module Gbbib
|
7
8
|
# Common scrapping methods.
|
@@ -37,12 +38,20 @@ module Gbbib
|
|
37
38
|
# * :part_number [String]
|
38
39
|
def get_docid(doc, xpt = '//dt[text()="标准号"]/following-sibling::dd[1]')
|
39
40
|
item_ref = doc.xpath(xpt).text.match(/(?<=\s)(\d+)\.?((?<=\.)\d+|)/)
|
40
|
-
|
41
|
+
prefix = doc.xpath(xpt).text.match(/^[^\s]+/).to_s
|
42
|
+
{ project_number: item_ref[1], part_number: item_ref[2], prefix: prefix }
|
41
43
|
end
|
42
44
|
|
43
45
|
def get_contributors(doc, xpt = '//dt[text()="标准号"]/following-sibling::dd[1]')
|
46
|
+
gb_en = GbAgencies::Agencies.new("en", {}, "")
|
47
|
+
gb_zh = GbAgencies::Agencies.new("zh", {}, "")
|
44
48
|
name = doc.xpath(xpt).text.match(/^[^\s]+/).to_s
|
45
|
-
|
49
|
+
name.sub!(%r{/[TZ]$}, "") unless name.match(/^GB/)
|
50
|
+
gbtype = get_gbtype(doc)
|
51
|
+
entity = IsoBibItem::Organization.new name: [
|
52
|
+
{language: "en", content: gb_en.standard_agency1(gbtype[:scope], name, gbtype[:mandate]) },
|
53
|
+
{language: "zh", content: gb_zh.standard_agency1(gbtype[:scope], name, gbtype[:mandate]) },
|
54
|
+
]
|
46
55
|
[{ entity: entity, roles: ['publisher'] }]
|
47
56
|
end
|
48
57
|
|
@@ -103,7 +112,7 @@ module Gbbib
|
|
103
112
|
# @param doc [Nokogiri::HTML::Document]
|
104
113
|
# @return [Array<String>]
|
105
114
|
def get_ccs(doc)
|
106
|
-
[doc
|
115
|
+
[doc&.xpath('//dt[text()="中国标准分类号"]/following-sibling::dd[1]')&.text]
|
107
116
|
end
|
108
117
|
|
109
118
|
# @param doc [Nokogiri::HTML::Document]
|
@@ -113,6 +122,7 @@ module Gbbib
|
|
113
122
|
# * :subgroup [String]
|
114
123
|
def get_ics(doc)
|
115
124
|
ics = doc.xpath('//dt[(.="国际标准分类号")]/following-sibling::dd[1]/span')
|
125
|
+
ics.empty? and return []
|
116
126
|
field, group, subgroup = ics.text.split '.'
|
117
127
|
[{ field: field, group: group.ljust(3, '0'), subgroup: subgroup }]
|
118
128
|
end
|
data/lib/gbbib/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gbbib
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-08-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -122,6 +122,20 @@ dependencies:
|
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0.2'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: gb-agencies
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 0.0.1
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: 0.0.1
|
125
139
|
description: 'GdBib: retrieve Chinese GB Standards for bibliographic use using the
|
126
140
|
BibliographicItem model.'
|
127
141
|
email:
|