extcite 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 59c11ddc6a3c2055e6d32941cf1e5227d759d8bb
4
+ data.tar.gz: 8bb4d062337f6caf5a272cca33900476ff29049c
5
+ SHA512:
6
+ metadata.gz: 8319b4a0eaacadc82b97d3780df98ae6669b3d62bb017f348dd32c30b09d433ae5b610afeae9d4566f2714b2681dd57526b826819872182c108dafe6a1b780b5
7
+ data.tar.gz: e26c399bf22cd9e498bf302ae2b484238aed579b6994a8f4e9d81c0da949b60b6b694681459c02af611d0c5664b5d37d2c1f89e071eea86e43c15f13487413f6
data/.gitignore ADDED
@@ -0,0 +1,36 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /lib/bundler/man/
26
+
27
+ # for a library or gem, you might want to ignore these files since the code is
28
+ # intended to run in multiple environments; otherwise, check them in:
29
+ #Gemfile.lock
30
+ .ruby-version
31
+ .ruby-gemset
32
+
33
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34
+ .rvmrc
35
+
36
+ cache/
data/.travis.yml ADDED
@@ -0,0 +1,9 @@
1
+ language: ruby
2
+ cache: bundler
3
+ rvm:
4
+ - 2.1.7
5
+ - 2.2.3
6
+ - 2.2.4
7
+ - 2.3.1
8
+ script:
9
+ bundle exec rake test TESTOPTS="-v"
data/CHANGELOG.md ADDED
@@ -0,0 +1,14 @@
1
+ ## 0.1.0 (2017-04-06)
2
+
3
+ * First version to Rubygems
4
+
5
+ ## 0.0.9 (2016-06-17)
6
+
7
+ * battle tested more, fixed a number of bugs
8
+ * now works with arxiv papers
9
+ * now works with biorxiv, or at least should
10
+ * improved extraction of DOIs
11
+
12
+ ## 0.1.0 (2016-06-07)
13
+
14
+ * just started, :)
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,81 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ extcite (0.1.0)
5
+ bibtex-ruby (~> 4.4)
6
+ faraday (~> 0.12.0.1)
7
+ faraday_middleware (~> 0.11.0.1)
8
+ oga (~> 2.2)
9
+ pdf-reader (~> 2.0)
10
+ serrano (~> 0.3.6)
11
+ thor (~> 0.19.4)
12
+
13
+ GEM
14
+ remote: https://rubygems.org/
15
+ specs:
16
+ Ascii85 (1.0.2)
17
+ afm (0.2.2)
18
+ ansi (1.5.0)
19
+ ast (2.3.0)
20
+ bibtex-ruby (4.4.3)
21
+ latex-decode (~> 0.0)
22
+ codecov (0.1.10)
23
+ json
24
+ simplecov
25
+ url
26
+ docile (1.1.5)
27
+ faraday (0.12.0.1)
28
+ multipart-post (>= 1.2, < 3)
29
+ faraday_middleware (0.11.0.1)
30
+ faraday (>= 0.7.4, < 1.0)
31
+ hashery (2.1.2)
32
+ json (2.0.3)
33
+ latex-decode (0.2.2)
34
+ unicode (~> 0.4)
35
+ multi_json (1.12.1)
36
+ multipart-post (2.0.0)
37
+ oga (2.9)
38
+ ast
39
+ ruby-ll (~> 2.1)
40
+ pdf-reader (2.0.0)
41
+ Ascii85 (~> 1.0.0)
42
+ afm (~> 0.2.1)
43
+ hashery (~> 2.0)
44
+ ruby-rc4
45
+ ttfunk
46
+ power_assert (1.0.1)
47
+ rake (12.0.0)
48
+ ruby-ll (2.1.2)
49
+ ansi
50
+ ast
51
+ ruby-rc4 (0.1.5)
52
+ serrano (0.3.6)
53
+ faraday (~> 0.12.0.1)
54
+ faraday_middleware (~> 0.11.0.1)
55
+ multi_json (~> 1.12, >= 1.12.1)
56
+ thor (~> 0.19.4)
57
+ simplecov (0.14.1)
58
+ docile (~> 1.1.0)
59
+ json (>= 1.8, < 3)
60
+ simplecov-html (~> 0.10.0)
61
+ simplecov-html (0.10.0)
62
+ test-unit (3.2.3)
63
+ power_assert
64
+ thor (0.19.4)
65
+ ttfunk (1.5.0)
66
+ unicode (0.4.4.2)
67
+ url (0.3.2)
68
+
69
+ PLATFORMS
70
+ ruby
71
+
72
+ DEPENDENCIES
73
+ bundler (~> 1.14, >= 1.14.6)
74
+ codecov (~> 0.1.10)
75
+ extcite!
76
+ rake (~> 12.0, >= 12.0.0)
77
+ simplecov (~> 0.14.1)
78
+ test-unit (~> 3.2, >= 3.2.1)
79
+
80
+ BUNDLED WITH
81
+ 1.14.6
data/README.md ADDED
@@ -0,0 +1,71 @@
1
+ extcite
2
+ =======
3
+
4
+ [![gem version](https://img.shields.io/gem/v/extcite.svg)](https://rubygems.org/gems/extcite)
5
+ [![Build Status](https://travis-ci.org/sckott/extcite.svg?branch=master)](https://travis-ci.org/sckott/extcite)
6
+ [![codecov.io](http://codecov.io/github/sckott/extcite/coverage.svg?branch=master)](http://codecov.io/github/sckott/extcite?branch=master)
7
+
8
+ __`extcite` gets DOIS and generates citations for your papers__
9
+
10
+ ## Install
11
+
12
+ ### Release version
13
+
14
+ ```
15
+ gem install extcite
16
+ ```
17
+
18
+ ### Development version
19
+
20
+ ```
21
+ git clone git@github.com:sckott/extcite.git
22
+ cd extcite
23
+ rake install
24
+ ```
25
+
26
+ > if `rake install` fails, try `sudo rake install`. If that fails, open an issue with what `rake install --trace` gives you
27
+
28
+ ## Examples
29
+
30
+ ### Within Ruby
31
+
32
+ ```ruby
33
+ require 'extcite'
34
+ ```
35
+
36
+ #### Search
37
+
38
+ A single paper
39
+
40
+ ```ruby
41
+ require 'net/http'
42
+ File.write("foo.pdf", Net::HTTP.get(URI.parse("https://scottchamberlain.info/pdfs/GuoEtal2015PlosOne.pdf")))
43
+ Extcite.extract(path: 'foo.pdf')
44
+ ```
45
+
46
+ bib citation is written to a file given in `file` param
47
+
48
+ Many papers at once
49
+
50
+ ```ruby
51
+ Dir.mkdir('bar')
52
+ File.write("bar/foo1.pdf", Net::HTTP.get(URI.parse("https://scottchamberlain.info/pdfs/Chamberlain&Szocs2013F1000Research.pdf")))
53
+ File.write("bar/foo2.pdf", Net::HTTP.get(URI.parse("https://scottchamberlain.info/pdfs/GuoEtal2015PlosOne.pdf")))
54
+ Extcite.extract(path: 'bar')
55
+ ```
56
+
57
+ ### On the CLI
58
+
59
+ All pdfs in the current directory:
60
+
61
+ ```shell
62
+ extcite extract .
63
+ ```
64
+
65
+ Single paper
66
+
67
+ ```shell
68
+ extcite extract foo.pdf
69
+ ```
70
+
71
+ [changelog]: https://github.com/sckott/extcite/blob/master/CHANGELOG.md
data/Rakefile ADDED
@@ -0,0 +1,41 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new do |t|
5
+ t.libs << "test"
6
+ t.test_files = FileList['test/test-*.rb']
7
+ t.verbose = true
8
+ end
9
+
10
+ desc "Run tests"
11
+ task :default => :test
12
+
13
+ desc "Build extcite docs"
14
+ task :docs do
15
+ system "yardoc"
16
+ end
17
+
18
+ desc "bundle install"
19
+ task :bundle do
20
+ system "bundle install"
21
+ end
22
+
23
+ desc "clean out builds"
24
+ task :clean do
25
+ system "ls | grep [0-9].gem | xargs rm"
26
+ end
27
+
28
+ desc "Build extcite"
29
+ task :build do
30
+ system "gem build extcite.gemspec"
31
+ end
32
+
33
+ desc "Install extcite"
34
+ task :install => [:bundle, :build] do
35
+ system "gem install extcite-#{Extcite::VERSION}.gem"
36
+ end
37
+
38
+ desc "Release to Rubygems"
39
+ task :release => :build do
40
+ system "gem push extcite-#{Extcite::VERSION}.gem"
41
+ end
data/bin/extcite ADDED
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "thor"
4
+
5
+ class Dz < Thor
6
+ include Thor::Actions
7
+ require 'extcite'
8
+
9
+ desc "extract STRING", "Get bib data from PDFs"
10
+ # method_option :path => :string
11
+ def extract(tt)
12
+ tt = "#{tt}"
13
+ Extcite.extract(path: tt)
14
+ end
15
+ end
16
+
17
+ Dz.start(ARGV)
data/extcite.gemspec ADDED
@@ -0,0 +1,36 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'extcite/version'
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = 'extcite'
8
+ s.version = Extcite::VERSION
9
+ s.date = '2017-04-06'
10
+ s.summary = "Citations from PDFs"
11
+ s.description = "Gets DOIS and generates citations for your papers"
12
+ s.authors = "Scott Chamberlain"
13
+ s.email = 'myrmecocystus@gmail.com'
14
+ s.homepage = 'http://github.com/sckott/extcite'
15
+ s.licenses = 'MIT'
16
+
17
+ s.files = `git ls-files -z`.split("\x0").reject {|f| f.match(%r{^(test|spec|features)/}) }
18
+ s.require_paths = ["lib"]
19
+
20
+ s.bindir = 'bin'
21
+ s.executables = ['extcite']
22
+
23
+ s.add_development_dependency 'bundler', '~> 1.14', '>= 1.14.6'
24
+ s.add_development_dependency 'rake', '~> 12.0', '>= 12.0.0'
25
+ s.add_development_dependency 'test-unit', '~> 3.2', '>= 3.2.1'
26
+ s.add_development_dependency 'simplecov', '~> 0.14.1'
27
+ s.add_development_dependency 'codecov', '~> 0.1.10'
28
+
29
+ s.add_runtime_dependency 'faraday', '~> 0.12.0.1'
30
+ s.add_runtime_dependency 'faraday_middleware', '~> 0.11.0.1'
31
+ s.add_runtime_dependency 'thor', '~> 0.19.4'
32
+ s.add_runtime_dependency 'oga', '~> 2.2'
33
+ s.add_runtime_dependency 'serrano', '~> 0.3.6'
34
+ s.add_runtime_dependency 'bibtex-ruby', '~> 4.4'
35
+ s.add_runtime_dependency 'pdf-reader', '~> 2.0'
36
+ end
data/extra/fetch.rb ADDED
@@ -0,0 +1,49 @@
1
+ module Textminer
2
+ class Fetch #:nodoc:
3
+ attr_accessor :doi, :type
4
+
5
+ def initialize(doi, type)
6
+ self.doi = doi
7
+ self.type = type
8
+ end
9
+
10
+ def fetchtext
11
+ lks = Textminer.links(self.doi)
12
+ lk = pick_link(lks)
13
+ case self.type
14
+ when "xml"
15
+ # HTTParty.get(lk)
16
+ coll = []
17
+ Array(lk).each do |x|
18
+ coll << HTTParty.get(x)
19
+ end
20
+ return coll
21
+ when "pdf"
22
+ serialize_pdf(lk, self.doi)
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ def pick_link(x)
29
+ case self.type
30
+ when "xml"
31
+ x.xml
32
+ when "pdf"
33
+ x.pdf
34
+ else
35
+ puts "type must be xml or pdf"
36
+ end
37
+ end
38
+
39
+ def serialize_pdf(x, y)
40
+ path = "/Users/sacmac/.textminer/" + y.gsub('/', '_') + ".pdf"
41
+ File.open(path, "wb") do |f|
42
+ f.write HTTParty.get(x).parsed_response
43
+ end
44
+
45
+ return path
46
+ end
47
+
48
+ end
49
+ end
@@ -0,0 +1,17 @@
1
+ ##
2
+ # Thin layer around pdf-reader gem's PDF::Reader
3
+ #
4
+ # @param doi [Array] A DOI, digital object identifier
5
+ # @param type [Array] One of two options to download: xml (default) or pdf
6
+ #
7
+ # @example
8
+ # require 'textminer'
9
+ # # fetch full text by DOI - xml by default
10
+ # Textminer.fetch("10.3897/phytokeys.42.7604")
11
+ # # many DOIs - xml output
12
+ # res = Textminer.fetch(["10.3897/phytokeys.42.7604", "10.3897/zookeys.516.9439"])
13
+ # # fetch full text - pdf
14
+ # Textminer.fetch("10.3897/phytokeys.42.7604", "pdf")
15
+ def self.fetch(doi, type = 'xml')
16
+ Fetch.new(doi, type).fetchtext
17
+ end
@@ -0,0 +1,8 @@
1
+ # Array methods
2
+ class Array
3
+ def write_bib(file)
4
+ File.open(file, 'a') do |f|
5
+ f.puts self
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,32 @@
1
+ require 'oga'
2
+ require 'bibtex'
3
+
4
+ # String methods
5
+ class String
6
+ def write_bib(file)
7
+ File.open(file, 'a') do |f|
8
+ f.puts self
9
+ end
10
+ end
11
+ end
12
+
13
+ class String
14
+ def make_bib_arxiv(id)
15
+ # prep xml
16
+ xml = Oga.parse_xml(self)
17
+ # author = xml.xpath('//author//name')[0].text.downcase.gsub(/\s|\./, '_')
18
+ year = DateTime.strptime(xml.xpath('//updated')[0].text).year
19
+
20
+ # make bib citation
21
+ bib = BibTeX::Bibliography.new
22
+ bib << BibTeX::Entry.new({
23
+ :bibtex_type => :article,
24
+ :url => xml.xpath('//entry/id').text,
25
+ :author => xml.xpath('//author//name').collect { |x| x.text }.join(' and '),
26
+ :eprint => id,
27
+ :title => xml.xpath('//entry//title').text,
28
+ :year => year
29
+ })
30
+ return bib.to_s
31
+ end
32
+ end
@@ -0,0 +1,47 @@
1
+ require "oga"
2
+
3
+ def singlearray2hash(x)
4
+ if x.length == 1 && x.class == Array
5
+ return x[0]
6
+ else
7
+ return x
8
+ end
9
+ end
10
+
11
+ def dir_files(x)
12
+ Dir.entries(x).select { |entry|
13
+ !File.directory? File.join(x, entry) and !(entry =='.' || entry == '..')
14
+ }.map { |z|
15
+ x + '/' + z
16
+ }
17
+ end
18
+
19
+ def make_paths(x)
20
+ path = Array(x)
21
+ if path.length == 1
22
+ # if a directory
23
+ if File.directory?(path[0])
24
+ # keep only files with .pdf extension
25
+ path = dir_files(path[0]).keep_if { |z| !!z.match(/.pdf/) }
26
+ end
27
+ end
28
+
29
+ # check that files exist
30
+ path.each do |z|
31
+ if !File.exist?(z)
32
+ raise z + ' not found'
33
+ end
34
+ end
35
+
36
+ return path
37
+ end
38
+
39
+ def pdf_doi(x)
40
+ xml = Oga.parse_xml(x)
41
+ begin
42
+ tt = xml.xpath('//rdf:Description')
43
+ return tt.attr('dc:identifier')[0].text.sub(/doi:/, '')
44
+ rescue
45
+ return nil
46
+ end
47
+ end
@@ -0,0 +1,3 @@
1
+ module Extcite
2
+ VERSION = "0.1.0"
3
+ end
data/lib/extcite.rb ADDED
@@ -0,0 +1,224 @@
1
+ require "extcite/utils"
2
+ require "extcite/methods_array"
3
+ require "extcite/methods_string"
4
+ require "extcite/version"
5
+
6
+
7
+ require 'serrano'
8
+ require 'pdf-reader'
9
+ require 'faraday'
10
+
11
+
12
+
13
+ module Extcite
14
+ ##
15
+ # Extract DOIs from one or more PDFs
16
+ #
17
+ # @param path [String] Path to a pdf file, or a folder of PDF files
18
+ # @param file [String] File name to write data to - or nil to stdout
19
+ # @param output [String] Typeo of output. only bibtex for now
20
+ #
21
+ # Return: writes bib files to a .bib file or an array if file is nil
22
+ # When writing to a file, `extract` by default appends to the end
23
+ # of the file so you can build up your bibtex file with your
24
+ # citations
25
+ #
26
+ # @example
27
+ # require 'extcite'
28
+ # require 'faraday'
29
+ # # get a paper in pdf format
30
+ # path = '2068.pdf'
31
+ # res = Faraday.new(:url => "https://peerj.com/articles/2068.pdf").get
32
+ # f = File.new(path, "wb")
33
+ # f.write(res.body)
34
+ # f.close()
35
+ # # extract doi from the pdf
36
+ # Extcite.extract(path: path)
37
+ # Extcite.extract(path: path, file: nil)
38
+ def self.extract(path:, file: "out.bib", output: "bib")
39
+ path = make_paths(path)
40
+ path.each do |x|
41
+ # try PDF metadata first
42
+ ids = nil
43
+ rr = PDF::Reader.new(x)
44
+ pdfmeta = rr.metadata
45
+ if !pdfmeta.nil?
46
+ xml = Oga.parse_xml(pdfmeta)
47
+ begin
48
+ tt = xml.xpath('//rdf:Description')
49
+ # try dc:identifier attribute
50
+ ss = tt.attr('dc:identifier')[0]
51
+ if !ss.nil?
52
+ ids = ss.text.sub(/doi:/, '')
53
+ else
54
+ # try prism:doi node
55
+ pdoi = xml.xpath('//rdf:Description//prism:doi')
56
+ if pdoi.length == 1
57
+ ids = pdoi.text
58
+ else
59
+ # try pdf:WPS-ARTICLEDOI node
60
+ wpsdoi = xml.xpath('//rdf:Description//pdf:WPS-ARTICLEDOI')
61
+ if wpsdoi.length == 1
62
+ ids = wpsdoi.text
63
+ else
64
+ # try pdfx:WPS-ARTICLEDOI node
65
+ pdfxwpsdoi = xml.xpath('//rdf:Description//pdfx:WPS-ARTICLEDOI')
66
+ if pdfxwpsdoi.length == 1
67
+ ids = pdfxwpsdoi.text
68
+ else
69
+ ids = nil
70
+ end
71
+ end
72
+ end
73
+ end
74
+ rescue
75
+ ids = nil
76
+ end
77
+ end
78
+
79
+ # if not found, try regexing for DOI
80
+ if ids.nil?
81
+ ids = Extcite.get_ids(txt: Extcite.extract_text_one(x))
82
+ end
83
+
84
+ if ids.length == 0
85
+ puts "no DOI found in " + x
86
+ else
87
+ if !ids.match(/arxiv/i).nil? && ids.length < 200
88
+ conn = Faraday.new(:url => 'http://export.arxiv.org/api/query?id_list=' + ids.gsub(/arxiv:/i, '')).get
89
+ bibs = conn.body.make_bib_arxiv(ids.gsub(/arxiv:/i, ''))
90
+ else
91
+ bibs = Extcite.cont_neg(ids: ids)
92
+ end
93
+
94
+ # if an error or not found, skip
95
+ bibstest = nil
96
+ if bibs.class == Array
97
+ bibstest = bibs[0]
98
+ end
99
+
100
+ if !bibstest.nil?
101
+ if !bibstest.match(/error|not found/i).nil? || !bibstest.match(/<\/html>/i).nil?
102
+ puts "DOI found: " + ids + " ; but citation not found via content negotation - passing"
103
+ # do something else?
104
+ else
105
+ if file.nil?
106
+ return bibs
107
+ else
108
+ puts "writing " + ids + " to " + file
109
+ bibs.write_bib(file)
110
+ end
111
+ end
112
+ end
113
+ end
114
+ end
115
+ end
116
+
117
+ ##
118
+ # Extract DOIs from one or more PDFs after extracting text
119
+ #
120
+ # @param path [String] Path to a pdf file, or a folder of PDF files
121
+ #
122
+ # @example
123
+ # require 'extcite'
124
+ # require 'faraday'
125
+ # # get a paper in pdf format
126
+ # path = '2068.pdf'
127
+ # res = Faraday.new(:url => "https://peerj.com/articles/2068.pdf").get
128
+ # f = File.new(path, "wb")
129
+ # f.write(res.body)
130
+ # f.close()
131
+ # # extract doi from the pdf
132
+ # Extcite.extract_dois(path: path)
133
+ def self.extract_dois(path:)
134
+ txt = Extcite.extract_text(path: path)
135
+ return txt.map { |z| z.match("[0-9]+\\.[0-9]+/.+").to_s.gsub(/\s.+/, '') }
136
+ end
137
+
138
+ ##
139
+ # Get DOIs from a String or Array of String's
140
+ #
141
+ # @param txt [String] String or Array of String's
142
+ #
143
+ # Return: Array of DOIs
144
+ #
145
+ # @example
146
+ # require 'extcite'
147
+ # Extcite.get_ids(txt: '10.1016/j.dendro.2014.01.004 adfasdf asd fas df asdfsd')
148
+ def self.get_ids(txt:)
149
+ # see if there's
150
+
151
+ return Array(txt).map { |z|
152
+ # detect if is an arxiv paper
153
+ if !z.match(/arxiv:[0-9]+\.[0-9A-Za-z]+/i).nil?
154
+ # if so, return arxiv id for later extraction of arxiv citation via their API
155
+ z = z.match(/arxiv:[0-9]+\.[0-9A-Za-z]+/i).to_s
156
+ else
157
+ doi_pattern = '(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![%"#? ])\\S)+)'
158
+ z = z.match(doi_pattern).to_s.gsub(/\s.+/, '')
159
+ # z = z.match("10\\.[0-9]+/.+").to_s.gsub(/\s.+/, '')
160
+ end
161
+ # clean up doi
162
+ z = z.gsub(/\.$|\.;$|\.\]$|\.\}$|\.\)$|,$/, '')
163
+ return z.gsub(/;$|\]$|\}$|\)$/, '')
164
+ }[0]
165
+ end
166
+
167
+ ##
168
+ # Extract text from a pdf, or many pdfs
169
+ #
170
+ # @param path [String] Path to a pdf file, or a folder of PDF files
171
+ #
172
+ # This method is used internally within fetch to parse PDFs.
173
+ #
174
+ # @example
175
+ # require 'extcite'
176
+ # require 'faraday'
177
+ # # get a paper in pdf format
178
+ # path = '2068.pdf'
179
+ # res = Faraday.new(:url => "https://peerj.com/articles/2068.pdf").get
180
+ # f = File.new(path, "wb")
181
+ # f.write(res.body)
182
+ # f.close()
183
+ # # extract doi from the pdf
184
+ # Extcite.extract_text(path: path)
185
+ def self.extract_text(path:)
186
+ path = Array(path)
187
+ if path.length == 1
188
+ if File.directory?(path[0])
189
+ # keep only files with .pdf extension
190
+ path = dir_files(path[0]).keep_if { |z| !!z.match(/.pdf/) }
191
+ end
192
+ end
193
+
194
+ out = []
195
+ path.each do |x|
196
+ rr = PDF::Reader.new(x)
197
+ out << rr.pages.map { |page| page.text }.join("\n")
198
+ end
199
+ return out
200
+ end
201
+
202
+ ##
203
+ # Get citation(s) using Crossref content negotation
204
+ #
205
+ # @param ids [Array[String]] One or more DOIs in an array
206
+ #
207
+ # Return: an array of bib data
208
+ #
209
+ # @example
210
+ # require 'extcite'
211
+ # Extcite.cont_neg(ids: "10.1016/j.dendro.2014.01.004")
212
+ def self.cont_neg(ids:)
213
+ out = Serrano.content_negotiation(ids: ids)
214
+ return out
215
+ end
216
+
217
+ protected
218
+
219
+ def self.extract_text_one(x)
220
+ rr = PDF::Reader.new(x)
221
+ return rr.pages.map { |page| page.text }.join("\n")
222
+ end
223
+
224
+ end
metadata ADDED
@@ -0,0 +1,246 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: extcite
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Scott Chamberlain
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-04-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.14'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.14.6
23
+ type: :development
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '1.14'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.14.6
33
+ - !ruby/object:Gem::Dependency
34
+ name: rake
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '12.0'
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 12.0.0
43
+ type: :development
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - "~>"
48
+ - !ruby/object:Gem::Version
49
+ version: '12.0'
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 12.0.0
53
+ - !ruby/object:Gem::Dependency
54
+ name: test-unit
55
+ requirement: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - "~>"
58
+ - !ruby/object:Gem::Version
59
+ version: '3.2'
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: 3.2.1
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: '3.2'
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: 3.2.1
73
+ - !ruby/object:Gem::Dependency
74
+ name: simplecov
75
+ requirement: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - "~>"
78
+ - !ruby/object:Gem::Version
79
+ version: 0.14.1
80
+ type: :development
81
+ prerelease: false
82
+ version_requirements: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - "~>"
85
+ - !ruby/object:Gem::Version
86
+ version: 0.14.1
87
+ - !ruby/object:Gem::Dependency
88
+ name: codecov
89
+ requirement: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - "~>"
92
+ - !ruby/object:Gem::Version
93
+ version: 0.1.10
94
+ type: :development
95
+ prerelease: false
96
+ version_requirements: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - "~>"
99
+ - !ruby/object:Gem::Version
100
+ version: 0.1.10
101
+ - !ruby/object:Gem::Dependency
102
+ name: faraday
103
+ requirement: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - "~>"
106
+ - !ruby/object:Gem::Version
107
+ version: 0.12.0.1
108
+ type: :runtime
109
+ prerelease: false
110
+ version_requirements: !ruby/object:Gem::Requirement
111
+ requirements:
112
+ - - "~>"
113
+ - !ruby/object:Gem::Version
114
+ version: 0.12.0.1
115
+ - !ruby/object:Gem::Dependency
116
+ name: faraday_middleware
117
+ requirement: !ruby/object:Gem::Requirement
118
+ requirements:
119
+ - - "~>"
120
+ - !ruby/object:Gem::Version
121
+ version: 0.11.0.1
122
+ type: :runtime
123
+ prerelease: false
124
+ version_requirements: !ruby/object:Gem::Requirement
125
+ requirements:
126
+ - - "~>"
127
+ - !ruby/object:Gem::Version
128
+ version: 0.11.0.1
129
+ - !ruby/object:Gem::Dependency
130
+ name: thor
131
+ requirement: !ruby/object:Gem::Requirement
132
+ requirements:
133
+ - - "~>"
134
+ - !ruby/object:Gem::Version
135
+ version: 0.19.4
136
+ type: :runtime
137
+ prerelease: false
138
+ version_requirements: !ruby/object:Gem::Requirement
139
+ requirements:
140
+ - - "~>"
141
+ - !ruby/object:Gem::Version
142
+ version: 0.19.4
143
+ - !ruby/object:Gem::Dependency
144
+ name: oga
145
+ requirement: !ruby/object:Gem::Requirement
146
+ requirements:
147
+ - - "~>"
148
+ - !ruby/object:Gem::Version
149
+ version: '2.2'
150
+ type: :runtime
151
+ prerelease: false
152
+ version_requirements: !ruby/object:Gem::Requirement
153
+ requirements:
154
+ - - "~>"
155
+ - !ruby/object:Gem::Version
156
+ version: '2.2'
157
+ - !ruby/object:Gem::Dependency
158
+ name: serrano
159
+ requirement: !ruby/object:Gem::Requirement
160
+ requirements:
161
+ - - "~>"
162
+ - !ruby/object:Gem::Version
163
+ version: 0.3.6
164
+ type: :runtime
165
+ prerelease: false
166
+ version_requirements: !ruby/object:Gem::Requirement
167
+ requirements:
168
+ - - "~>"
169
+ - !ruby/object:Gem::Version
170
+ version: 0.3.6
171
+ - !ruby/object:Gem::Dependency
172
+ name: bibtex-ruby
173
+ requirement: !ruby/object:Gem::Requirement
174
+ requirements:
175
+ - - "~>"
176
+ - !ruby/object:Gem::Version
177
+ version: '4.4'
178
+ type: :runtime
179
+ prerelease: false
180
+ version_requirements: !ruby/object:Gem::Requirement
181
+ requirements:
182
+ - - "~>"
183
+ - !ruby/object:Gem::Version
184
+ version: '4.4'
185
+ - !ruby/object:Gem::Dependency
186
+ name: pdf-reader
187
+ requirement: !ruby/object:Gem::Requirement
188
+ requirements:
189
+ - - "~>"
190
+ - !ruby/object:Gem::Version
191
+ version: '2.0'
192
+ type: :runtime
193
+ prerelease: false
194
+ version_requirements: !ruby/object:Gem::Requirement
195
+ requirements:
196
+ - - "~>"
197
+ - !ruby/object:Gem::Version
198
+ version: '2.0'
199
+ description: Gets DOIS and generates citations for your papers
200
+ email: myrmecocystus@gmail.com
201
+ executables:
202
+ - extcite
203
+ extensions: []
204
+ extra_rdoc_files: []
205
+ files:
206
+ - ".gitignore"
207
+ - ".travis.yml"
208
+ - CHANGELOG.md
209
+ - Gemfile
210
+ - Gemfile.lock
211
+ - README.md
212
+ - Rakefile
213
+ - bin/extcite
214
+ - extcite.gemspec
215
+ - extra/fetch.rb
216
+ - extra/fetch_method.rb
217
+ - lib/extcite.rb
218
+ - lib/extcite/methods_array.rb
219
+ - lib/extcite/methods_string.rb
220
+ - lib/extcite/utils.rb
221
+ - lib/extcite/version.rb
222
+ homepage: http://github.com/sckott/extcite
223
+ licenses:
224
+ - MIT
225
+ metadata: {}
226
+ post_install_message:
227
+ rdoc_options: []
228
+ require_paths:
229
+ - lib
230
+ required_ruby_version: !ruby/object:Gem::Requirement
231
+ requirements:
232
+ - - ">="
233
+ - !ruby/object:Gem::Version
234
+ version: '0'
235
+ required_rubygems_version: !ruby/object:Gem::Requirement
236
+ requirements:
237
+ - - ">="
238
+ - !ruby/object:Gem::Version
239
+ version: '0'
240
+ requirements: []
241
+ rubyforge_project:
242
+ rubygems_version: 2.6.8
243
+ signing_key:
244
+ specification_version: 4
245
+ summary: Citations from PDFs
246
+ test_files: []