gbbib 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: 2967105032707b9f8b1232f232874383d178cac8e37e81564c3f4703706bb042
4
- data.tar.gz: 4fe03b886a4af769c90de2100d05d4dbf539b47577f5992b9ad6559f9c2632cb
2
+ SHA1:
3
+ metadata.gz: eb3b926e47fdfca5623a37e092d8072be696a8fe
4
+ data.tar.gz: 781ce62f34f01ea01ce1b1bea083ec851beb56e8
5
5
  SHA512:
6
- metadata.gz: 987c2fdf13dfebb7153d14faa2db1a3aabab8905f2d60978b5d23587a774613884e3509b8047590993c43342b8c7a88c98189b4974c4edc3f3ba06b3da84dec3
7
- data.tar.gz: 25be12092031186450f44218b40dfb50d660b7aeadd24da1130b4f190b38c504572156a6550b0f20ce5eb443e6eab428221227ffe7cd61d61398ed2097faa0fd
6
+ metadata.gz: c19520eb951344e11564dfba80bc16111ef331dc5b46e8bb37a31b1d3892ad56165ed90495eb82a1158e1034f6e05800dc7d40feced12c74f2e067a50e7ce173
7
+ data.tar.gz: 4b75cfa4a98a855a83800d7c84c207997f008a5d5ab80a655fa5d2098be19ae248dd255afce983b069a7d27d997aa1ac411cd26d26f5c7c31a327f11069f5776
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- gbbib (0.1.2)
4
+ gbbib (0.1.4)
5
5
  cnccs
6
6
  iso-bib-item
7
7
 
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'gbbib/workers_pool'
4
+
3
5
  # GB bib module.
4
6
  module Gbbib
5
7
  # GB entry point class.
@@ -7,7 +9,7 @@ module Gbbib
7
9
  class << self
8
10
  # rubocop:disable Metrics/MethodLength
9
11
  # @param text [Strin] code of standard for search
10
- # @return [Gbbib::Hits]
12
+ # @return [Gbbib::HitCollection]
11
13
  def search(text)
12
14
  if text.match?(/^(GB|GJ|GS)/)
13
15
  # Scrape national standards.
@@ -30,6 +32,124 @@ module Gbbib
30
32
  end
31
33
  end
32
34
  # rubocop:enable Metrics/MethodLength
35
+
36
+ # @param code [String] the GB standard Code to look up (e..g "GB/T 20223")
37
+ # @param year [String] the year the standard was published (optional)
38
+ # @param opts [Hash] options; restricted to :all_parts if all-parts reference is required
39
+ # @return [String] Relaton XML serialisation of reference
40
+ def get(code, year, opts)
41
+ return iev.to_xml if code.casecmp? 'IEV'
42
+ code += '.1' if opts[:all_parts]
43
+ ret = get1(code, year, opts)
44
+ return nil if ret.nil?
45
+ ret.to_most_recent_reference unless year
46
+ ret.to_all_parts if opts[:all_parts]
47
+ ret.to_xml
48
+ end
49
+
50
+ private
51
+
52
+ def fetch_ref_err(code, year, missed_years)
53
+ id = year ? "#{code}:#{year}" : code
54
+ warn "WARNING: no match found on the ISO website for #{id}. "\
55
+ "The code must be exactly like it is on the website."
56
+ warn "(There was no match for #{year}, though there were matches "\
57
+ "found for #{missed_years.join(', ')}.)" unless missed_years.empty?
58
+ if /\d-\d/.match? code
59
+ warn "The provided document part may not exist, or the document "\
60
+ "may no longer be published in parts."
61
+ else
62
+ warn "If you wanted to cite all document parts for the reference, "\
63
+ "use \"#{code} (all parts)\".\nIf the document is not a standard, "\
64
+ "use its document type abbreviation (TS, TR, PAS, Guide)."
65
+ end
66
+ nil
67
+ end
68
+
69
+ def get1(code, year, opts)
70
+ return iev if code.casecmp? "IEV"
71
+ result = search_filter(code) or return nil
72
+ ret = results_filter(result, year)
73
+ return ret[:ret] if ret[:ret]
74
+ fetch_ref_err(code, year, ret[:years])
75
+ end
76
+
77
+ def search_filter(code)
78
+ docidrx = %r{^[^\s]+\s[\d\.]+}
79
+ # corrigrx = %r{^[^\s]+\s[\d\.]+-[0-9]+/}
80
+ warn "fetching #{code}..."
81
+ result = search(code)
82
+ ret = result.select do |hit|
83
+ hit.title && hit.title.match(docidrx).to_s == code # &&
84
+ # !corrigrx.match?(hit.title)
85
+ end
86
+ return ret unless ret.empty?
87
+ []
88
+ end
89
+
90
+ # Sort through the results from Isobib, fetching them three at a time,
91
+ # and return the first result that matches the code,
92
+ # matches the year (if provided), and which # has a title (amendments do not).
93
+ # Only expects the first page of results to be populated.
94
+ # Does not match corrigenda etc (e.g. ISO 3166-1:2006/Cor 1:2007)
95
+ # If no match, returns any years which caused mismatch, for error reporting
96
+ def results_filter(result, year)
97
+ missed_years = []
98
+ result.each_slice(3) do |s| # ISO website only allows 3 connections
99
+ fetch_pages(s, 3).each_with_index do |r, i|
100
+ return { ret: r } if !year
101
+ r.dates.select { |d| d.type == "published" }.each do |d|
102
+ return { ret: r } if year.to_i == d.on.year
103
+ missed_years << d.on.year
104
+ end
105
+ end
106
+ end
107
+ { years: missed_years }
108
+ end
109
+
110
+ def fetch_pages(s, n)
111
+ workers = WorkersPool.new n
112
+ workers.worker { |w| { i: w[:i], hit: w[:hit].fetch } }
113
+ s.each_with_index { |hit, i| workers << { i: i, hit: hit } }
114
+ workers.end
115
+ workers.result.sort { |x, y| x[:i] <=> y[:i] }.map { |x| x[:hit] }
116
+ end
117
+
118
+ def iev
119
+ Nokogiri::XML.fragment(<<~"END")
120
+ <bibitem type="international-standard" id="IEV">
121
+ <title format="text/plain" language="en" script="Latn">Electropedia: The World's Online Electrotechnical Vocabulary</title>
122
+ <source type="src">http://www.electropedia.org</source>
123
+ <docidentifier>IEV</docidentifier>
124
+ <date type="published"> <on>#{Date.today.year}</on> </date>
125
+ <contributor>
126
+ <role type="publisher"/>
127
+ <organization>
128
+ <name>International Electrotechnical Commission</name>
129
+ <abbreviation>IEC</abbreviation>
130
+ <uri>www.iec.ch</uri>
131
+ </organization>
132
+ </contributor>
133
+ <language>en</language> <language>fr</language>
134
+ <script>Latn</script>
135
+ <copyright>
136
+ <from>#{Date.today.year}</from>
137
+ <owner>
138
+ <organization>
139
+ <name>International Electrotechnical Commission</name>
140
+ <abbreviation>IEC</abbreviation>
141
+ <uri>www.iec.ch</uri>
142
+ </organization>
143
+ </owner>
144
+ </copyright>
145
+ <relation type="updates">
146
+ <bibitem>
147
+ <formattedref>IEC 60050</formattedref>
148
+ </bibitem>
149
+ </relation>
150
+ </bibitem>
151
+ END
152
+ end
33
153
  end
34
154
  end
35
155
  end
data/lib/gbbib/hit.rb CHANGED
@@ -43,6 +43,8 @@ module Gbbib
43
43
  "@title=\"#{title}\">"
44
44
  end
45
45
 
46
+ # @param builder [Nokogiri::XML::Builder]
47
+ # @param opts [Hash]
46
48
  # @return [String]
47
49
  def to_xml(builder = nil, opts = {})
48
50
  if builder
@@ -21,25 +21,25 @@ module Gbbib
21
21
  @hit_pages = hit_pages
22
22
  end
23
23
 
24
- # @return [Isobib::HitCollection]
25
- # def fetch
26
- # workers = WorkersPool.new 4
27
- # workers.worker(&:fetch)
28
- # each do |hit|
29
- # workers << hit
30
- # end
31
- # workers.end
32
- # workers.result
33
- # @fetched = true
34
- # self
35
- # end
24
+ # @return [GbBib::HitCollection]
25
+ def fetch
26
+ workers = WorkersPool.new 4
27
+ workers.worker(&:fetch)
28
+ each do |hit|
29
+ workers << hit
30
+ end
31
+ workers.end
32
+ workers.result
33
+ @fetched = true
34
+ self
35
+ end
36
36
 
37
- # def to_s
38
- # inspect
39
- # end
40
- #
41
- # def inspect
42
- # "<#{self.class}:#{format('%#.14x', object_id << 1)} @fetched=#{@fetched}>"
43
- # end
37
+ def to_s
38
+ inspect
39
+ end
40
+
41
+ def inspect
42
+ "<#{self.class}:#{format('%#.14x', object_id << 1)} @fetched=#{@fetched}>"
43
+ end
44
44
  end
45
45
  end
@@ -14,18 +14,19 @@ module Gbbib
14
14
  # @return [Hash]
15
15
  def scrapped_data(doc, src:)
16
16
  {
17
- committee: get_committee(doc),
18
- docid: get_docid(doc),
19
- titles: get_titles(doc),
20
- type: get_type(doc),
21
- docstatus: get_status(doc),
22
- gbtype: get_gbtype(doc),
23
- ccs: get_ccs(doc),
24
- ics: get_ics(doc),
25
- link: [{ type: 'src', content: src }],
26
- dates: get_dates(doc),
27
- language: ['zh'],
28
- script: ['Hans']
17
+ committee: get_committee(doc),
18
+ docid: get_docid(doc),
19
+ titles: get_titles(doc),
20
+ contributors: get_contributors(doc),
21
+ type: get_type(doc),
22
+ docstatus: get_status(doc),
23
+ gbtype: get_gbtype(doc),
24
+ ccs: get_ccs(doc),
25
+ ics: get_ics(doc),
26
+ link: [{ type: 'src', content: src }],
27
+ dates: get_dates(doc),
28
+ language: ['zh'],
29
+ script: ['Hans']
29
30
  }
30
31
  end
31
32
  # rubocop:enable Metrics/MethodLength
@@ -35,11 +36,17 @@ module Gbbib
35
36
  # * :project_number [String]
36
37
  # * :part_number [String]
37
38
  def get_docid(doc, xpt = '//dt[text()="标准号"]/following-sibling::dd[1]')
38
- item_ref = doc.xpath(xpt)
39
- .text.match(/(?<=\s)(\d+)-?((?<=-)\d+|)/)
39
+ item_ref = doc.xpath(xpt).text.match(/(?<=\s)(\d+)\.?((?<=\.)\d+|)/)
40
40
  { project_number: item_ref[1], part_number: item_ref[2] }
41
41
  end
42
42
 
43
+ def get_contributors(doc, xpt = '//dt[text()="标准号"]/following-sibling::dd[1]')
44
+ name = doc.xpath(xpt).text.match(/^[^\s]+/).to_s
45
+ entity = IsoBibItem::Organization.new name: name, abbreviation: name
46
+ [{ entity: entity, roles: ['publisher'] }]
47
+ end
48
+
49
+
43
50
  # @param doc [Nokogiri::HTML::Document]
44
51
  # @return [Array<Hash>]
45
52
  # * :title_intro [String]
data/lib/gbbib/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Gbbib
4
- VERSION = '0.1.3'
4
+ VERSION = '0.1.4'
5
5
  end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Workers poll.
4
+ class WorkersPool
5
+ attr_accessor :nb_hits
6
+
7
+ def initialize(num_workers = 2)
8
+ @num_workers = num_workers < 2 ? 2 : num_workers
9
+ @queue = SizedQueue.new(num_workers * 2)
10
+ @result = []
11
+ @nb_hits = 0
12
+ end
13
+
14
+ def worker(&block)
15
+ @threads = Array.new @num_workers do
16
+ Thread.new do
17
+ until (item = @queue.pop) == :END
18
+ @result << yield(item) if block
19
+ end
20
+ end
21
+ end
22
+ end
23
+
24
+ def result
25
+ @threads.each(&:join)
26
+ @result
27
+ end
28
+
29
+ def <<(item)
30
+ @queue << item
31
+ self
32
+ end
33
+
34
+ def end
35
+ @num_workers.times { @queue << :END }
36
+ end
37
+
38
+ def size
39
+ @result.size
40
+ end
41
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gbbib
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-07-02 00:00:00.000000000 Z
11
+ date: 2018-07-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -154,6 +154,7 @@ files:
154
154
  - lib/gbbib/sec_scrapper.rb
155
155
  - lib/gbbib/t_scrapper.rb
156
156
  - lib/gbbib/version.rb
157
+ - lib/gbbib/workers_pool.rb
157
158
  - lib/gbbib/yaml/prefixes.yaml
158
159
  homepage: https://github.com/riboseinc/gdbib
159
160
  licenses:
@@ -175,7 +176,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
175
176
  version: '0'
176
177
  requirements: []
177
178
  rubyforge_project:
178
- rubygems_version: 2.7.6
179
+ rubygems_version: 2.6.12
179
180
  signing_key:
180
181
  specification_version: 4
181
182
  summary: 'GdBib: retrieve Chinese GB Standards for bibliographic use using the BibliographicItem