risbn 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +25 -2
- data/VERSION +1 -1
- data/lib/risbn.rb +1 -1
- data/lib/risbn/scanner.rb +55 -0
- metadata +2 -1
data/README.rdoc
CHANGED
@@ -1,9 +1,32 @@
|
|
1
1
|
= risbn
|
2
2
|
|
3
|
-
|
3
|
+
Minimal set of tools for working with isbns from ruby.
|
4
|
+
|
5
|
+
Supports both isbn-10 and isbn-13.
|
6
|
+
|
7
|
+
Provides a simple (barebones) tool for extracting isbns from pdf and chm files.
|
8
|
+
|
9
|
+
== Examples:
|
10
|
+
|
11
|
+
isbn = RISBN.parse_first("Some text with and isbn: ISBN-13: 978-0393317732") # => <RISBN isbn="9780393317732">
|
12
|
+
isbn.valid? # => true
|
13
|
+
|
14
|
+
require 'risbn/scanner'
|
15
|
+
RISBN::Scanner.scan("some/file.pdf")
|
16
|
+
RISBN::Scanner.scan("some/file.chm")
|
17
|
+
RISBN::Scanner.scan("some/file.tx")
|
18
|
+
|
19
|
+
== Notes
|
20
|
+
|
21
|
+
Currently only works on unix like platforms.
|
22
|
+
|
23
|
+
Requires the following tools for scanning files:
|
24
|
+
|
25
|
+
* Poppler for pdf (pdftotext utility)
|
26
|
+
* Archmage for chm
|
4
27
|
|
5
28
|
== Note on Patches/Pull Requests
|
6
|
-
|
29
|
+
|
7
30
|
* Fork the project.
|
8
31
|
* Make your feature addition or bug fix.
|
9
32
|
* Add tests for it. This is important so I don't break it in a
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/lib/risbn.rb
CHANGED
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'shellwords'
|
2
|
+
require 'tmpdir'
|
3
|
+
require 'iconv'
|
4
|
+
|
5
|
+
class RISBN
|
6
|
+
|
7
|
+
# Scan a file for a isbn. Currently only text files, pdf and chm files are allowed.
|
8
|
+
# Uses unix 'file' command to identify the file.
|
9
|
+
# For pdf scanning uses poppler, for chm scanning uses archmage.
|
10
|
+
module Scanner
|
11
|
+
extend self
|
12
|
+
|
13
|
+
# provide a file path of a file to scan for the first found isbn.
|
14
|
+
# currently scans pdfs using poppler, chm using archmage and text files.
|
15
|
+
# Also, requires the unix utility "file"
|
16
|
+
def scan(path)
|
17
|
+
case identify(path)
|
18
|
+
when /PDF/ then scan_pdf(path)
|
19
|
+
when /HtmlHelp/ then scan_chm(path)
|
20
|
+
when /text/ then scan_txt(path)
|
21
|
+
end || RISBN.new
|
22
|
+
end
|
23
|
+
|
24
|
+
def identify(path)
|
25
|
+
File.file?(path) ? %x|file -F :::: #{path.to_s.shellescape}|.split("::::").last.strip : ""
|
26
|
+
end
|
27
|
+
|
28
|
+
def scan_chm(path)
|
29
|
+
Dir.mktmpdir do |dir|
|
30
|
+
tmp = File.join(dir, "tempfile.txt")
|
31
|
+
system("python -W ignore $(which archmage) -c text #{ path.to_s.shellescape } #{ tmp.to_s.shellescape } 2>&1 > /dev/null")
|
32
|
+
scan_txt(tmp)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def scan_pdf(path)
|
37
|
+
Dir.mktmpdir do |dir|
|
38
|
+
tmp = File.join(dir, "tempfile.txt")
|
39
|
+
system("pdftotext -q -f 0 -l 20 -raw -nopgbrk #{ path.to_s.shellescape } #{ tmp.to_s.shellescape }")
|
40
|
+
scan_txt(tmp)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def scan_txt(path)
|
45
|
+
IO.foreach(path) do |line|
|
46
|
+
isbn = RISBN.parse_first(line)
|
47
|
+
return isbn if isbn.valid?
|
48
|
+
end
|
49
|
+
nil
|
50
|
+
rescue # any problem with the text encoding
|
51
|
+
nil
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: risbn
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Emmanuel Oga
|
@@ -39,6 +39,7 @@ files:
|
|
39
39
|
- Rakefile
|
40
40
|
- VERSION
|
41
41
|
- lib/risbn.rb
|
42
|
+
- lib/risbn/scanner.rb
|
42
43
|
- spec/risbn_spec.rb
|
43
44
|
- spec/spec_helper.rb
|
44
45
|
has_rdoc: true
|