gbbib 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: 2967105032707b9f8b1232f232874383d178cac8e37e81564c3f4703706bb042
4
- data.tar.gz: 4fe03b886a4af769c90de2100d05d4dbf539b47577f5992b9ad6559f9c2632cb
2
+ SHA1:
3
+ metadata.gz: eb3b926e47fdfca5623a37e092d8072be696a8fe
4
+ data.tar.gz: 781ce62f34f01ea01ce1b1bea083ec851beb56e8
5
5
  SHA512:
6
- metadata.gz: 987c2fdf13dfebb7153d14faa2db1a3aabab8905f2d60978b5d23587a774613884e3509b8047590993c43342b8c7a88c98189b4974c4edc3f3ba06b3da84dec3
7
- data.tar.gz: 25be12092031186450f44218b40dfb50d660b7aeadd24da1130b4f190b38c504572156a6550b0f20ce5eb443e6eab428221227ffe7cd61d61398ed2097faa0fd
6
+ metadata.gz: c19520eb951344e11564dfba80bc16111ef331dc5b46e8bb37a31b1d3892ad56165ed90495eb82a1158e1034f6e05800dc7d40feced12c74f2e067a50e7ce173
7
+ data.tar.gz: 4b75cfa4a98a855a83800d7c84c207997f008a5d5ab80a655fa5d2098be19ae248dd255afce983b069a7d27d997aa1ac411cd26d26f5c7c31a327f11069f5776
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- gbbib (0.1.2)
4
+ gbbib (0.1.4)
5
5
  cnccs
6
6
  iso-bib-item
7
7
 
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'gbbib/workers_pool'
4
+
3
5
  # GB bib module.
4
6
  module Gbbib
5
7
  # GB entry point class.
@@ -7,7 +9,7 @@ module Gbbib
7
9
  class << self
8
10
  # rubocop:disable Metrics/MethodLength
9
11
  # @param text [Strin] code of standard for search
10
- # @return [Gbbib::Hits]
12
+ # @return [Gbbib::HitCollection]
11
13
  def search(text)
12
14
  if text.match?(/^(GB|GJ|GS)/)
13
15
  # Scrape national standards.
@@ -30,6 +32,124 @@ module Gbbib
30
32
  end
31
33
  end
32
34
  # rubocop:enable Metrics/MethodLength
35
+
36
+ # @param code [String] the GB standard Code to look up (e..g "GB/T 20223")
37
+ # @param year [String] the year the standard was published (optional)
38
+ # @param opts [Hash] options; restricted to :all_parts if all-parts reference is required
39
+ # @return [String] Relaton XML serialisation of reference
40
+ def get(code, year, opts)
41
+ return iev.to_xml if code.casecmp? 'IEV'
42
+ code += '.1' if opts[:all_parts]
43
+ ret = get1(code, year, opts)
44
+ return nil if ret.nil?
45
+ ret.to_most_recent_reference unless year
46
+ ret.to_all_parts if opts[:all_parts]
47
+ ret.to_xml
48
+ end
49
+
50
+ private
51
+
52
+ def fetch_ref_err(code, year, missed_years)
53
+ id = year ? "#{code}:#{year}" : code
54
+ warn "WARNING: no match found on the ISO website for #{id}. "\
55
+ "The code must be exactly like it is on the website."
56
+ warn "(There was no match for #{year}, though there were matches "\
57
+ "found for #{missed_years.join(', ')}.)" unless missed_years.empty?
58
+ if /\d-\d/.match? code
59
+ warn "The provided document part may not exist, or the document "\
60
+ "may no longer be published in parts."
61
+ else
62
+ warn "If you wanted to cite all document parts for the reference, "\
63
+ "use \"#{code} (all parts)\".\nIf the document is not a standard, "\
64
+ "use its document type abbreviation (TS, TR, PAS, Guide)."
65
+ end
66
+ nil
67
+ end
68
+
69
+ def get1(code, year, opts)
70
+ return iev if code.casecmp? "IEV"
71
+ result = search_filter(code) or return nil
72
+ ret = results_filter(result, year)
73
+ return ret[:ret] if ret[:ret]
74
+ fetch_ref_err(code, year, ret[:years])
75
+ end
76
+
77
+ def search_filter(code)
78
+ docidrx = %r{^[^\s]+\s[\d\.]+}
79
+ # corrigrx = %r{^[^\s]+\s[\d\.]+-[0-9]+/}
80
+ warn "fetching #{code}..."
81
+ result = search(code)
82
+ ret = result.select do |hit|
83
+ hit.title && hit.title.match(docidrx).to_s == code # &&
84
+ # !corrigrx.match?(hit.title)
85
+ end
86
+ return ret unless ret.empty?
87
+ []
88
+ end
89
+
90
+ # Sort through the results from Isobib, fetching them three at a time,
91
+ # and return the first result that matches the code,
92
+ # matches the year (if provided), and which # has a title (amendments do not).
93
+ # Only expects the first page of results to be populated.
94
+ # Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
95
+ # If no match, returns any years which caused mismatch, for error reporting
96
+ def results_filter(result, year)
97
+ missed_years = []
98
+ result.each_slice(3) do |s| # ISO website only allows 3 connections
99
+ fetch_pages(s, 3).each_with_index do |r, i|
100
+ return { ret: r } if !year
101
+ r.dates.select { |d| d.type == "published" }.each do |d|
102
+ return { ret: r } if year.to_i == d.on.year
103
+ missed_years << d.on.year
104
+ end
105
+ end
106
+ end
107
+ { years: missed_years }
108
+ end
109
+
110
+ def fetch_pages(s, n)
111
+ workers = WorkersPool.new n
112
+ workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
113
+ s.each_with_index { |hit, i| workers << { i: i, hit: hit } }
114
+ workers.end
115
+ workers.result.sort { |x, y| x[:i] <=> y[:i] }.map { |x| x[:hit] }
116
+ end
117
+
118
+ def iev
119
+ Nokogiri::XML.fragment(<<~"END")
120
+ <bibitem type="international-standard" id="IEV">
121
+ <title format="text/plain" language="en" script="Latn">Electropedia: The World's Online Electrotechnical Vocabulary</title>
122
+ <source type="src">http://www.electropedia.org</source>
123
+ <docidentifier>IEV</docidentifier>
124
+ <date type="published"> <on>#{Date.today.year}</on> </date>
125
+ <contributor>
126
+ <role type="publisher"/>
127
+ <organization>
128
+ <name>International Electrotechnical Commission</name>
129
+ <abbreviation>IEC</abbreviation>
130
+ <uri>www.iec.ch</uri>
131
+ </organization>
132
+ </contributor>
133
+ <language>en</language> <language>fr</language>
134
+ <script>Latn</script>
135
+ <copyright>
136
+ <from>#{Date.today.year}</from>
137
+ <owner>
138
+ <organization>
139
+ <name>International Electrotechnical Commission</name>
140
+ <abbreviation>IEC</abbreviation>
141
+ <uri>www.iec.ch</uri>
142
+ </organization>
143
+ </owner>
144
+ </copyright>
145
+ <relation type="updates">
146
+ <bibitem>
147
+ <formattedref>IEC 60050</formattedref>
148
+ </bibitem>
149
+ </relation>
150
+ </bibitem>
151
+ END
152
+ end
33
153
  end
34
154
  end
35
155
  end
data/lib/gbbib/hit.rb CHANGED
@@ -43,6 +43,8 @@ module Gbbib
43
43
  "@title=\"#{title}\">"
44
44
  end
45
45
 
46
+ # @param builder [Nokogiri::XML::Builder]
47
+ # @param opts [Hash]
46
48
  # @return [String]
47
49
  def to_xml(builder = nil, opts = {})
48
50
  if builder
@@ -21,25 +21,25 @@ module Gbbib
21
21
  @hit_pages = hit_pages
22
22
  end
23
23
 
24
- # @return [Isobib::HitCollection]
25
- # def fetch
26
- # workers = WorkersPool.new 4
27
- # workers.worker(&:fetch)
28
- # each do |hit|
29
- # workers << hit
30
- # end
31
- # workers.end
32
- # workers.result
33
- # @fetched = true
34
- # self
35
- # end
24
+ # @return [GbBib::HitCollection]
25
+ def fetch
26
+ workers = WorkersPool.new 4
27
+ workers.worker(&:fetch)
28
+ each do |hit|
29
+ workers << hit
30
+ end
31
+ workers.end
32
+ workers.result
33
+ @fetched = true
34
+ self
35
+ end
36
36
 
37
- # def to_s
38
- # inspect
39
- # end
40
- #
41
- # def inspect
42
- # "<#{self.class}:#{format('%#.14x', object_id << 1)} @fetched=#{@fetched}>"
43
- # end
37
+ def to_s
38
+ inspect
39
+ end
40
+
41
+ def inspect
42
+ "<#{self.class}:#{format('%#.14x', object_id << 1)} @fetched=#{@fetched}>"
43
+ end
44
44
  end
45
45
  end
@@ -14,18 +14,19 @@ module Gbbib
14
14
  # @return [Hash]
15
15
  def scrapped_data(doc, src:)
16
16
  {
17
- committee: get_committee(doc),
18
- docid: get_docid(doc),
19
- titles: get_titles(doc),
20
- type: get_type(doc),
21
- docstatus: get_status(doc),
22
- gbtype: get_gbtype(doc),
23
- ccs: get_ccs(doc),
24
- ics: get_ics(doc),
25
- link: [{ type: 'src', content: src }],
26
- dates: get_dates(doc),
27
- language: ['zh'],
28
- script: ['Hans']
17
+ committee: get_committee(doc),
18
+ docid: get_docid(doc),
19
+ titles: get_titles(doc),
20
+ contributors: get_contributors(doc),
21
+ type: get_type(doc),
22
+ docstatus: get_status(doc),
23
+ gbtype: get_gbtype(doc),
24
+ ccs: get_ccs(doc),
25
+ ics: get_ics(doc),
26
+ link: [{ type: 'src', content: src }],
27
+ dates: get_dates(doc),
28
+ language: ['zh'],
29
+ script: ['Hans']
29
30
  }
30
31
  end
31
32
  # rubocop:enable Metrics/MethodLength
@@ -35,11 +36,17 @@ module Gbbib
35
36
  # * :project_number [String]
36
37
  # * :part_number [String]
37
38
  def get_docid(doc, xpt = '//dt[text()="标准号"]/following-sibling::dd[1]')
38
- item_ref = doc.xpath(xpt)
39
- .text.match(/(?<=\s)(\d+)-?((?<=-)\d+|)/)
39
+ item_ref = doc.xpath(xpt).text.match(/(?<=\s)(\d+)\.?((?<=\.)\d+|)/)
40
40
  { project_number: item_ref[1], part_number: item_ref[2] }
41
41
  end
42
42
 
43
+ def get_contributors(doc, xpt = '//dt[text()="标准号"]/following-sibling::dd[1]')
44
+ name = doc.xpath(xpt).text.match(/^[^\s]+/).to_s
45
+ entity = IsoBibItem::Organization.new name: name, abbreviation: name
46
+ [{ entity: entity, roles: ['publisher'] }]
47
+ end
48
+
49
+
43
50
  # @param doc [Nokogiri::HTML::Document]
44
51
  # @return [Array<Hash>]
45
52
  # * :title_intro [String]
data/lib/gbbib/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Gbbib
4
- VERSION = '0.1.3'
4
+ VERSION = '0.1.4'
5
5
  end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Workers poll.
4
+ class WorkersPool
5
+ attr_accessor :nb_hits
6
+
7
+ def initialize(num_workers = 2)
8
+ @num_workers = num_workers < 2 ? 2 : num_workers
9
+ @queue = SizedQueue.new(num_workers * 2)
10
+ @result = []
11
+ @nb_hits = 0
12
+ end
13
+
14
+ def worker(&block)
15
+ @threads = Array.new @num_workers do
16
+ Thread.new do
17
+ until (item = @queue.pop) == :END
18
+ @result << yield(item) if block
19
+ end
20
+ end
21
+ end
22
+ end
23
+
24
+ def result
25
+ @threads.each(&:join)
26
+ @result
27
+ end
28
+
29
+ def <<(item)
30
+ @queue << item
31
+ self
32
+ end
33
+
34
+ def end
35
+ @num_workers.times { @queue << :END }
36
+ end
37
+
38
+ def size
39
+ @result.size
40
+ end
41
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gbbib
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-07-02 00:00:00.000000000 Z
11
+ date: 2018-07-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -154,6 +154,7 @@ files:
154
154
  - lib/gbbib/sec_scrapper.rb
155
155
  - lib/gbbib/t_scrapper.rb
156
156
  - lib/gbbib/version.rb
157
+ - lib/gbbib/workers_pool.rb
157
158
  - lib/gbbib/yaml/prefixes.yaml
158
159
  homepage: https://github.com/riboseinc/gdbib
159
160
  licenses:
@@ -175,7 +176,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
175
176
  version: '0'
176
177
  requirements: []
177
178
  rubyforge_project:
178
- rubygems_version: 2.7.6
179
+ rubygems_version: 2.6.12
179
180
  signing_key:
180
181
  specification_version: 4
181
182
  summary: 'GdBib: retrieve Chinese GB Standards for bibliographic use using the BibliographicItem