isobib 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +3 -0
- data/.rubocop.yml +4 -0
- data/.travis.yml +5 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +66 -0
- data/README.adoc +172 -0
- data/Rakefile +6 -0
- data/bin/bundle +105 -0
- data/bin/byebug +29 -0
- data/bin/coderay +29 -0
- data/bin/console +14 -0
- data/bin/htmldiff +29 -0
- data/bin/httpclient +29 -0
- data/bin/ldiff +29 -0
- data/bin/nokogiri +29 -0
- data/bin/pry +29 -0
- data/bin/rake +29 -0
- data/bin/rspec +29 -0
- data/bin/setup +8 -0
- data/isobib.gemspec +40 -0
- data/lib/isobib/hit.rb +55 -0
- data/lib/isobib/hit_collection.rb +42 -0
- data/lib/isobib/hit_pages.rb +94 -0
- data/lib/isobib/iso_bibliography.rb +24 -0
- data/lib/isobib/scrapper.rb +378 -0
- data/lib/isobib/version.rb +5 -0
- data/lib/isobib/workers_pool.rb +41 -0
- data/lib/isobib.rb +4 -0
- metadata +174 -0
data/bin/pry
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'pry' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 150) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("pry", "pry")
|
data/bin/rake
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'rake' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 150) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("rake", "rake")
|
data/bin/rspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'rspec' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 150) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("rspec-core", "rspec")
|
data/bin/setup
ADDED
data/isobib.gemspec
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
lib = File.expand_path('lib', __dir__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
require 'isobib/version'
|
6
|
+
|
7
|
+
Gem::Specification.new do |spec|
|
8
|
+
spec.name = 'isobib'
|
9
|
+
spec.version = Isobib::VERSION
|
10
|
+
spec.authors = ['Ribose Inc.']
|
11
|
+
spec.email = ['open.source@ribose.com']
|
12
|
+
|
13
|
+
spec.summary = 'IsoBib: retrieve ISO Standards for bibliographic use '\
|
14
|
+
'using the BibliographicItem model'
|
15
|
+
spec.description = 'IsoBib: retrieve ISO Standards for bibliographic use '\
|
16
|
+
'using the BibliographicItem model'
|
17
|
+
|
18
|
+
spec.homepage = 'https://github.com/riboseinc/isobib'
|
19
|
+
spec.license = 'MIT'
|
20
|
+
|
21
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
22
|
+
f.match(%r{^(test|spec|features)/})
|
23
|
+
end
|
24
|
+
spec.bindir = 'exe'
|
25
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
26
|
+
spec.require_paths = ['lib']
|
27
|
+
|
28
|
+
spec.add_development_dependency 'bundler', '~> 1.16'
|
29
|
+
spec.add_development_dependency 'pry-byebug'
|
30
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
31
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
32
|
+
spec.add_development_dependency 'simplecov'
|
33
|
+
|
34
|
+
spec.add_dependency 'algoliasearch'
|
35
|
+
spec.add_dependency 'iso-bib-item'
|
36
|
+
# spec.add_dependency 'isoics'
|
37
|
+
# spec.add_dependency 'nokogiri'
|
38
|
+
# spec.add_dependency "capybara"
|
39
|
+
# spec.add_dependency "poltergeist"
|
40
|
+
end
|
data/lib/isobib/hit.rb
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Isobib
|
4
|
+
# Hit.
|
5
|
+
class Hit
|
6
|
+
# @return [Isobib::HitCollection]
|
7
|
+
attr_reader :hit_collection
|
8
|
+
|
9
|
+
# @return [Array<Hash>]
|
10
|
+
attr_reader :hit
|
11
|
+
|
12
|
+
# @param hit [Hash]
|
13
|
+
# @param hit_collection [Isobib:HitCollection]
|
14
|
+
def initialize(hit, hit_collection = nil)
|
15
|
+
@hit = hit
|
16
|
+
@hit_collection = hit_collection
|
17
|
+
end
|
18
|
+
|
19
|
+
# Parse page.
|
20
|
+
# @return [Isobib::IsoBibliographicItem]
|
21
|
+
def fetch
|
22
|
+
@fetch ||= Scrapper.parse_page @hit
|
23
|
+
end
|
24
|
+
|
25
|
+
# @return [String]
|
26
|
+
def to_s
|
27
|
+
inspect
|
28
|
+
end
|
29
|
+
|
30
|
+
# @return [String]
|
31
|
+
def inspect
|
32
|
+
matched_words = @hit['_highlightResult']
|
33
|
+
.inject([]) { |a, (_k, v)| a + v['matchedWords'] }.uniq
|
34
|
+
|
35
|
+
"<#{self.class}:#{format('%#.14x', object_id << 1)} "\
|
36
|
+
"@text=\"#{@hit_collection&.hit_pages&.text}\" "\
|
37
|
+
"@fullIdentifier=\"#{@fetch&.shortref}\" "\
|
38
|
+
"@matchedWords=#{matched_words} "\
|
39
|
+
"@category=\"#{@hit['category']}\" "\
|
40
|
+
"@title=\"#{@hit['title']}\">"
|
41
|
+
end
|
42
|
+
|
43
|
+
# @return [String]
|
44
|
+
def to_xml(builder = nil, opts = {})
|
45
|
+
if builder
|
46
|
+
fetch.to_xml builder, opts
|
47
|
+
else
|
48
|
+
builder = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
|
49
|
+
fetch.to_xml xml, opts
|
50
|
+
end
|
51
|
+
builder.doc.root.to_xml
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'isobib/hit'
|
4
|
+
|
5
|
+
module Isobib
|
6
|
+
# Page of hit collection.
|
7
|
+
class HitCollection < Array
|
8
|
+
# @return [TrueClass, FalseClass]
|
9
|
+
attr_reader :fetched
|
10
|
+
|
11
|
+
# @return [Isobib::HitPages]
|
12
|
+
attr_reader :hit_pages
|
13
|
+
|
14
|
+
# @param hits [Array<Hash>]
|
15
|
+
def initialize(hits, hit_pages = nil)
|
16
|
+
concat(hits.map { |h| Hit.new(h, self) })
|
17
|
+
@fetched = false
|
18
|
+
@hit_pages = hit_pages
|
19
|
+
end
|
20
|
+
|
21
|
+
# @return [Isobib::HitCollection]
|
22
|
+
def fetch
|
23
|
+
workers = WorkersPool.new 4
|
24
|
+
workers.worker(&:fetch)
|
25
|
+
each do |hit|
|
26
|
+
workers << hit
|
27
|
+
end
|
28
|
+
workers.end
|
29
|
+
workers.result
|
30
|
+
@fetched = true
|
31
|
+
self
|
32
|
+
end
|
33
|
+
|
34
|
+
def to_s
|
35
|
+
inspect
|
36
|
+
end
|
37
|
+
|
38
|
+
def inspect
|
39
|
+
"<#{self.class}:#{format('%#.14x', object_id << 1)} @fetched=#{@fetched}>"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'algoliasearch'
|
4
|
+
require 'isobib/hit_collection'
|
5
|
+
|
6
|
+
module Isobib
|
7
|
+
# Pages of hits.
|
8
|
+
class HitPages < Array
|
9
|
+
Algolia.init application_id: 'JCL49WV5AR',
|
10
|
+
api_key: 'dd1b9e1ab383f4d4817d29cd5e96d3f0'
|
11
|
+
|
12
|
+
# @return [String]
|
13
|
+
attr_reader :text
|
14
|
+
|
15
|
+
# @param text [String]
|
16
|
+
def initialize(text)
|
17
|
+
@text = text
|
18
|
+
@index = Algolia::Index.new 'all_en'
|
19
|
+
resp = @index.search(text, facetFilters: ['category:standard'])
|
20
|
+
@nb_pages = resp['nbPages']
|
21
|
+
self << HitCollection.new(resp['hits'], self)
|
22
|
+
end
|
23
|
+
|
24
|
+
# @return [Isobib::HitCollection]
|
25
|
+
def last
|
26
|
+
collection(@nb_pages - 1)
|
27
|
+
end
|
28
|
+
|
29
|
+
# @param i [Integer]
|
30
|
+
# @return [Isobib::HitCollection]
|
31
|
+
def [](idx)
|
32
|
+
# collection i
|
33
|
+
return if idx + 1 > @nb_pages
|
34
|
+
collection idx
|
35
|
+
super
|
36
|
+
end
|
37
|
+
|
38
|
+
# @return [Array]
|
39
|
+
def map(&block)
|
40
|
+
m = []
|
41
|
+
@nb_pages.times do |n|
|
42
|
+
m << yield(self[n]) if block
|
43
|
+
end
|
44
|
+
m
|
45
|
+
end
|
46
|
+
|
47
|
+
def each(&block)
|
48
|
+
@nb_pages.times do |n|
|
49
|
+
yield self[n] if block
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def to_s
|
54
|
+
inspect
|
55
|
+
end
|
56
|
+
|
57
|
+
def inspect
|
58
|
+
"<#{self.class}:#{format('%#.14x', object_id << 1)} @text=#{@text} "\
|
59
|
+
"@pages=#{@nb_pages}>"
|
60
|
+
end
|
61
|
+
|
62
|
+
# @return [Integer]
|
63
|
+
def size
|
64
|
+
@nb_pages
|
65
|
+
end
|
66
|
+
|
67
|
+
def to_xml
|
68
|
+
builder = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
|
69
|
+
xml.documents do
|
70
|
+
each do |page|
|
71
|
+
page.fetch
|
72
|
+
page.each { |hit| hit.to_xml xml }
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
builder.to_xml
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
# @param i [Integer]
|
82
|
+
# @return [Isobib::HitCollection]
|
83
|
+
def collection(idx)
|
84
|
+
return if idx + 1 > @nb_pages
|
85
|
+
while Array.instance_method(:size).bind(self).call < idx + 1
|
86
|
+
resp = @index.search(@text,
|
87
|
+
facetFilters: ['category:standard'],
|
88
|
+
page: idx)
|
89
|
+
self << HitCollection.new(resp['hits'], self)
|
90
|
+
end
|
91
|
+
Array.instance_method(:[]).bind(self).call idx
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# require 'isobib/iso_bibliographic_item'
|
4
|
+
require 'isobib/scrapper'
|
5
|
+
require 'isobib/hit_pages'
|
6
|
+
|
7
|
+
module Isobib
|
8
|
+
# Class methods for search ISO standards.
|
9
|
+
class IsoBibliography
|
10
|
+
class << self
|
11
|
+
# @param text [String]
|
12
|
+
# @return [Isobib::HitPages]
|
13
|
+
def search(text)
|
14
|
+
HitPages.new text
|
15
|
+
end
|
16
|
+
|
17
|
+
# @param text [String]
|
18
|
+
# @return [Array<IsoBibliographicItem>]
|
19
|
+
def search_and_fetch(text)
|
20
|
+
Scrapper.get(text)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|