risbn 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +25 -2
- data/VERSION +1 -1
- data/lib/risbn.rb +1 -1
- data/lib/risbn/scanner.rb +55 -0
- metadata +2 -1
data/README.rdoc
CHANGED
@@ -1,9 +1,32 @@
|
|
1
1
|
= risbn
|
2
2
|
|
3
|
-
|
3
|
+
Minimal set of tools for working with isbns from ruby.
|
4
|
+
|
5
|
+
Supports both isbn-10 and isbn-13.
|
6
|
+
|
7
|
+
Provides a simple (barebones) tool for extracting isbns from pdf and chm files.
|
8
|
+
|
9
|
+
== Examples:
|
10
|
+
|
11
|
+
isbn = RISBN.parse_first("Some text with and isbn: ISBN-13: 978-0393317732") # => <RISBN isbn="9780393317732">
|
12
|
+
isbn.valid? # => true
|
13
|
+
|
14
|
+
require 'risbn/scanner'
|
15
|
+
RISBN::Scanner.scan("some/file.pdf")
|
16
|
+
RISBN::Scanner.scan("some/file.chm")
|
17
|
+
RISBN::Scanner.scan("some/file.tx")
|
18
|
+
|
19
|
+
== Notes
|
20
|
+
|
21
|
+
Currently only works on unix like platforms.
|
22
|
+
|
23
|
+
Requires the following tools for scanning files:
|
24
|
+
|
25
|
+
* Poppler for pdf (pdftotext utility)
|
26
|
+
* Archmage for chm
|
4
27
|
|
5
28
|
== Note on Patches/Pull Requests
|
6
|
-
|
29
|
+
|
7
30
|
* Fork the project.
|
8
31
|
* Make your feature addition or bug fix.
|
9
32
|
* Add tests for it. This is important so I don't break it in a
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/lib/risbn.rb
CHANGED
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'shellwords'
|
2
|
+
require 'tmpdir'
|
3
|
+
require 'iconv'
|
4
|
+
|
5
|
+
class RISBN
|
6
|
+
|
7
|
+
# Scan a file for a isbn. Currently only text files, pdf and chm files are allowed.
|
8
|
+
# Uses unix 'file' command to identify the file.
|
9
|
+
# For pdf scanning uses poppler, for chm scanning uses archmage.
|
10
|
+
module Scanner
|
11
|
+
extend self
|
12
|
+
|
13
|
+
# provide a file path of a file to scan for the first found isbn.
|
14
|
+
# currently scans pdfs using poppler, chm using archmage and text files.
|
15
|
+
# Also, requires the unix utility "file"
|
16
|
+
def scan(path)
|
17
|
+
case identify(path)
|
18
|
+
when /PDF/ then scan_pdf(path)
|
19
|
+
when /HtmlHelp/ then scan_chm(path)
|
20
|
+
when /text/ then scan_txt(path)
|
21
|
+
end || RISBN.new
|
22
|
+
end
|
23
|
+
|
24
|
+
def identify(path)
|
25
|
+
File.file?(path) ? %x|file -F :::: #{path.to_s.shellescape}|.split("::::").last.strip : ""
|
26
|
+
end
|
27
|
+
|
28
|
+
def scan_chm(path)
|
29
|
+
Dir.mktmpdir do |dir|
|
30
|
+
tmp = File.join(dir, "tempfile.txt")
|
31
|
+
system("python -W ignore $(which archmage) -c text #{ path.to_s.shellescape } #{ tmp.to_s.shellescape } 2>&1 > /dev/null")
|
32
|
+
scan_txt(tmp)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def scan_pdf(path)
|
37
|
+
Dir.mktmpdir do |dir|
|
38
|
+
tmp = File.join(dir, "tempfile.txt")
|
39
|
+
system("pdftotext -q -f 0 -l 20 -raw -nopgbrk #{ path.to_s.shellescape } #{ tmp.to_s.shellescape }")
|
40
|
+
scan_txt(tmp)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def scan_txt(path)
|
45
|
+
IO.foreach(path) do |line|
|
46
|
+
isbn = RISBN.parse_first(line)
|
47
|
+
return isbn if isbn.valid?
|
48
|
+
end
|
49
|
+
nil
|
50
|
+
rescue # any problem with the text encoding
|
51
|
+
nil
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: risbn
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Emmanuel Oga
|
@@ -39,6 +39,7 @@ files:
|
|
39
39
|
- Rakefile
|
40
40
|
- VERSION
|
41
41
|
- lib/risbn.rb
|
42
|
+
- lib/risbn/scanner.rb
|
42
43
|
- spec/risbn_spec.rb
|
43
44
|
- spec/spec_helper.rb
|
44
45
|
has_rdoc: true
|