gbbib 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +5 -3
- data/gbbib.gemspec +1 -0
- data/lib/gbbib/gb_bibliographic_item.rb +9 -0
- data/lib/gbbib/scrapper.rb +14 -4
- data/lib/gbbib/version.rb +1 -1
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b3e3ee80ed7fef30b07a956b8ec13313615c0e164ac523824fff69e63850539f
|
4
|
+
data.tar.gz: 9a753e31b51f631f710f96d3f38ba6bff254980f4faa5ed51571288acf5dc40d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c5ed82a9de4bb8cf57de8d5271f0e3de47f64496509581600dc2f398e64042adc621f2a3ac3ff5321f11bddfea5ac5b5a11c9dca35ae642e702de8fc7624e5b4
|
7
|
+
data.tar.gz: 21954b24468a3ea22ad0db05b606be95b37a19e1dae13d35d9ce2a766ba212366978f35d3991bb24b6d403bb8cc26c9d7c7bdbc0477c58c1d760839b5e01d03d
|
data/Gemfile.lock
CHANGED
@@ -1,21 +1,23 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
gbbib (0.2.
|
4
|
+
gbbib (0.2.2)
|
5
5
|
cnccs (~> 0.1.1)
|
6
|
+
gb-agencies (~> 0.0.1)
|
6
7
|
iso-bib-item (~> 0.2)
|
7
8
|
|
8
9
|
GEM
|
9
10
|
remote: https://rubygems.org/
|
10
11
|
specs:
|
11
12
|
byebug (10.0.2)
|
12
|
-
cnccs (0.1.
|
13
|
+
cnccs (0.1.2)
|
13
14
|
coderay (1.1.2)
|
14
15
|
diff-lcs (1.3)
|
15
16
|
docile (1.3.1)
|
16
17
|
equivalent-xml (0.6.0)
|
17
18
|
nokogiri (>= 1.4.3)
|
18
|
-
|
19
|
+
gb-agencies (0.0.1)
|
20
|
+
iso-bib-item (0.2.3)
|
19
21
|
isoics (~> 0.1.6)
|
20
22
|
nokogiri (~> 1.8.4)
|
21
23
|
ruby_deep_clone (~> 0.8.0)
|
data/gbbib.gemspec
CHANGED
@@ -58,6 +58,15 @@ module Gbbib
|
|
58
58
|
inspect
|
59
59
|
end
|
60
60
|
|
61
|
+
def id(attribute, delim = '')
|
62
|
+
return nil if attribute && !@id_attribute
|
63
|
+
idstr = "#{@docidentifier.prefix}#{delim}#{@docidentifier.project_number}"
|
64
|
+
if @docidentifier.part_number&.size&.positive?
|
65
|
+
idstr = idstr + "-#{@docidentifier.part_number}"
|
66
|
+
end
|
67
|
+
idstr.strip
|
68
|
+
end
|
69
|
+
|
61
70
|
private
|
62
71
|
|
63
72
|
# @param builder [Nokogiri::XML::Builder]
|
data/lib/gbbib/scrapper.rb
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
|
-
require
|
4
|
+
require "yaml"
|
5
|
+
require "gb_agencies"
|
5
6
|
|
6
7
|
module Gbbib
|
7
8
|
# Common scrapping methods.
|
@@ -37,12 +38,20 @@ module Gbbib
|
|
37
38
|
# * :part_number [String]
|
38
39
|
def get_docid(doc, xpt = '//dt[text()="标准号"]/following-sibling::dd[1]')
|
39
40
|
item_ref = doc.xpath(xpt).text.match(/(?<=\s)(\d+)\.?((?<=\.)\d+|)/)
|
40
|
-
|
41
|
+
prefix = doc.xpath(xpt).text.match(/^[^\s]+/).to_s
|
42
|
+
{ project_number: item_ref[1], part_number: item_ref[2], prefix: prefix }
|
41
43
|
end
|
42
44
|
|
43
45
|
def get_contributors(doc, xpt = '//dt[text()="标准号"]/following-sibling::dd[1]')
|
46
|
+
gb_en = GbAgencies::Agencies.new("en", {}, "")
|
47
|
+
gb_zh = GbAgencies::Agencies.new("zh", {}, "")
|
44
48
|
name = doc.xpath(xpt).text.match(/^[^\s]+/).to_s
|
45
|
-
|
49
|
+
name.sub!(%r{/[TZ]$}, "") unless name.match(/^GB/)
|
50
|
+
gbtype = get_gbtype(doc)
|
51
|
+
entity = IsoBibItem::Organization.new name: [
|
52
|
+
{language: "en", content: gb_en.standard_agency1(gbtype[:scope], name, gbtype[:mandate]) },
|
53
|
+
{language: "zh", content: gb_zh.standard_agency1(gbtype[:scope], name, gbtype[:mandate]) },
|
54
|
+
]
|
46
55
|
[{ entity: entity, roles: ['publisher'] }]
|
47
56
|
end
|
48
57
|
|
@@ -103,7 +112,7 @@ module Gbbib
|
|
103
112
|
# @param doc [Nokogiri::HTML::Document]
|
104
113
|
# @return [Array<String>]
|
105
114
|
def get_ccs(doc)
|
106
|
-
[doc
|
115
|
+
[doc&.xpath('//dt[text()="中国标准分类号"]/following-sibling::dd[1]')&.text]
|
107
116
|
end
|
108
117
|
|
109
118
|
# @param doc [Nokogiri::HTML::Document]
|
@@ -113,6 +122,7 @@ module Gbbib
|
|
113
122
|
# * :subgroup [String]
|
114
123
|
def get_ics(doc)
|
115
124
|
ics = doc.xpath('//dt[(.="国际标准分类号")]/following-sibling::dd[1]/span')
|
125
|
+
ics.empty? and return []
|
116
126
|
field, group, subgroup = ics.text.split '.'
|
117
127
|
[{ field: field, group: group.ljust(3, '0'), subgroup: subgroup }]
|
118
128
|
end
|
data/lib/gbbib/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gbbib
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-08-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -122,6 +122,20 @@ dependencies:
|
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0.2'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: gb-agencies
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 0.0.1
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: 0.0.1
|
125
139
|
description: 'GdBib: retrieve Chinese GB Standards for bibliographic use using the
|
126
140
|
BibliographicItem model.'
|
127
141
|
email:
|