RubyGems - gbbib - Versions diffs - 0.4.2 → 0.4.3 - Mend

gbbib 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 6eac6d0000a21b3dc52113ebda470cb6be672d2088e033648e090765559f0684
-  data.tar.gz: 7fb3b022408232970d3812713d14de9f32eb8ed5c3de9cec00533effd188e5c0
+  metadata.gz: 58ab839578d5b3328e69d578d2eedd6f5d4ffd50d335640166c036e14ef1529a
+  data.tar.gz: 255fad3a6567026a7e9d84ed2b4cec4adca96338f55f5f2b6c615cd8770055d9
 SHA512:
-  metadata.gz: e640701864d9f49b65a8b2842a3e6a29d0a6f4178f6598266cc95f690f9ac022fb4329a8e727e1c86d66369a64ab1e881523e51b30b2d1555f1af25a2bb88dd1
-  data.tar.gz: 13ec0e6d6e4c15a3a3475fccf541bb3ab37027bfa1f712e544fb55d58d96e4285eb6d9fc6a6e8f972352063a3ee30b3ea6e1bfe4fa7fc5f771d647874d801b20
+  metadata.gz: ff92e79b4036741e3b14265b164ef4d425fb4364428ee0c7d44dfcbcc76c8afd41f22613f69bf5f329c486149ed10d72e554f595ab99c0e05bb9d3bc899a9450
+  data.tar.gz: 56a556ca5591226870f02938348c5b256c536cecd062d2f430a466bcdb3b50cefe07384fbb3b08cea771a07e6073a02c73828c45c4077c88d1bbd350d6764eb2

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    gbbib (0.4.2)
+    gbbib (0.4.3)
       cnccs (~> 0.1.1)
       gb-agencies (~> 0.0.1)
       iso-bib-item (~> 0.4.2)

data/lib/gbbib/gb_scrapper.rb CHANGED Viewed

@@ -18,22 +18,30 @@ module Gbbib
       # @param text [Strin] code of standard for serarch
       # @return [Gbbib::HitCollection]
       def scrape_page(text)
-        search_html = OpenURI.open_uri(
-          'http://www.std.gov.cn/search/stdPage?q=' + text
-        )
-        result = Nokogiri::HTML search_html
-        hits = result.css('.s-title a').map do |h|
-          Hit.new pid: h[:pid], title: h.text, scrapper: self
+        begin
+          search_html = OpenURI.open_uri(
+            'http://www.std.gov.cn/search/stdPage?q=' + text
+          )
+          result = Nokogiri::HTML search_html
+          hits = result.css('.s-title a').map do |h|
+            Hit.new pid: h[:pid], title: h.text, scrapper: self
+          end
+          HitCollection.new hits
+        rescue
+          warn "Cannot access http://www.std.gov.cn/search/stdPage"
         end
-        HitCollection.new hits
       end
       # @param pid [Strin] standard's page id
       # @return [Gbbib::GbBibliographicItem]
       def scrape_doc(pid)
         src = 'http://www.std.gov.cn/gb/search/gbDetailed?id=' + pid
-        doc = Nokogiri::HTML OpenURI.open_uri(src)
-        GbBibliographicItem.new scrapped_data(doc, src: src)
+        begin
+          doc = Nokogiri::HTML OpenURI.open_uri(src)
+          GbBibliographicItem.new scrapped_data(doc, src: src)
+        rescue
+          warn "Cannot access http://www.std.gov.cn/search/stdPage"
+        end
       end
       # @param doc [Nokogiri::HTML]
@@ -42,7 +50,7 @@ module Gbbib
       #   * :name [String]
       def get_committee(doc)
         name = doc.xpath('//p/a[1]/following-sibling::text()').text
-                  .match(/(?<=（)[^）]+/).to_s
+          .match(/(?<=（)[^）]+/).to_s
         { type: 'technical', name: name }
       end
     end

data/lib/gbbib/sec_scrapper.rb CHANGED Viewed

@@ -19,11 +19,15 @@ module Gbbib
       # @return [Gbbib::HitCollection]
       def scrape_page(text)
         uri = URI "http://www.std.gov.cn/hb/search/hbPage?searchText=#{text}"
-        res = JSON.parse Net::HTTP.get(uri)
-        hits = res['rows'].map do |r|
-          Hit.new pid: r['id'], title: r['STD_CODE'], scrapper: self
+        begin
+          res = JSON.parse Net::HTTP.get(uri)
+          hits = res['rows'].map do |r|
+            Hit.new pid: r['id'], title: r['STD_CODE'], scrapper: self
+          end
+          HitCollection.new hits
+        rescue
+          warn "Cannot access #{uri}"
         end
-        HitCollection.new hits
       end
       # @param pid [String] standard's page id
@@ -31,8 +35,12 @@ module Gbbib
       def scrape_doc(pid)
         src = "http://www.std.gov.cn/hb/search/stdHBDetailed?id=#{pid}"
         page_uri = URI src
-        doc = Nokogiri::HTML Net::HTTP.get(page_uri)
-        GbBibliographicItem.new scrapped_data(doc, src: src)
+        begin
+          doc = Nokogiri::HTML Net::HTTP.get(page_uri)
+          GbBibliographicItem.new scrapped_data(doc, src: src)
+        rescue
+          warn "Cannot access #{src}"
+        end
       end
       private

data/lib/gbbib/t_scrapper.rb CHANGED Viewed

@@ -18,18 +18,22 @@ module Gbbib
       # @param text [String]
       # @return [Gbbib::HitCollection]
       def scrape_page(text)
-        search_html = OpenURI.open_uri(
-          'http://www.ttbz.org.cn/Home/Standard?searchType=2&key=' +
-          CGI.escape(text.tr('-', [8212].pack('U')))
-        )
-        header = Nokogiri::HTML search_html
-        xpath = '//table[contains(@class, "standard_list_table")]/tr/td/a'
-        t_xpath = '../preceding-sibling::td[3]'
-        hits = header.xpath(xpath).map do |h|
-          title = h.at(t_xpath).text.gsub(/â\u0080\u0094/, '-')
-          Hit.new pid: h[:href].sub(%r{\/$}, ''), title: title, scrapper: self
+        begin
+          search_html = OpenURI.open_uri(
+            'http://www.ttbz.org.cn/Home/Standard?searchType=2&key=' +
+            CGI.escape(text.tr('-', [8212].pack('U')))
+          )
+          header = Nokogiri::HTML search_html
+          xpath = '//table[contains(@class, "standard_list_table")]/tr/td/a'
+          t_xpath = '../preceding-sibling::td[3]'
+          hits = header.xpath(xpath).map do |h|
+            title = h.at(t_xpath).text.gsub(/â\u0080\u0094/, '-')
+            Hit.new pid: h[:href].sub(%r{\/$}, ''), title: title, scrapper: self
+          end
+          HitCollection.new hits
+        rescue
+          warn "Cannot connect to #{http://www.ttbz.org.cn/Home/Standard}"
         end
-        HitCollection.new hits
       end
       # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
@@ -37,8 +41,12 @@ module Gbbib
       # @return [Gbbib::GbBibliographicItem]
       def scrape_doc(pid)
         src = "http://www.ttbz.org.cn#{pid}"
-        doc = Nokogiri::HTML OpenURI.open_uri(src), nil, Encoding::UTF_8.to_s
-        GbBibliographicItem.new scrapped_data(doc, src: src)
+        begin
+          doc = Nokogiri::HTML OpenURI.open_uri(src), nil, Encoding::UTF_8.to_s
+          GbBibliographicItem.new scrapped_data(doc, src: src)
+        rescue
+          warn "Cannot connect to #{src}"
+        end
       end
       private
@@ -96,7 +104,7 @@ module Gbbib
       def get_ccs(doc)
         [doc.xpath('//td[contains(.,"中国标准分类号")]/following-sibling::td[1]')
-            .text.gsub(/[\r\n]/, '').strip.match(/^[^\s]+/).to_s]
+          .text.gsub(/[\r\n]/, '').strip.match(/^[^\s]+/).to_s]
       end
       def get_ics(doc)
@@ -108,7 +116,7 @@ module Gbbib
       def get_dates(doc)
         d = doc.xpath('//td[contains(.,"发布日期")]/following-sibling::td[1]/span')
-               .text.match(/(?<y>\d{4})[^\d]+(?<m>\d{2})[^\d]+(?<d>\d{2})/)
+          .text.match(/(?<y>\d{4})[^\d]+(?<m>\d{2})[^\d]+(?<d>\d{2})/)
         [{ type: 'published', on: "#{d[:y]}-#{d[:m]}-#{d[:d]}" }]
       end
     end

data/lib/gbbib/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Gbbib
-  VERSION = '0.4.2'
+  VERSION = '0.4.3'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: gbbib
 version: !ruby/object:Gem::Version
-  version: 0.4.2
+  version: 0.4.3
 platform: ruby
 authors:
 - Ribose Inc.
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2018-11-27 00:00:00.000000000 Z
+date: 2018-11-29 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler