risbn 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. data/README.rdoc +25 -2
  2. data/VERSION +1 -1
  3. data/lib/risbn.rb +1 -1
  4. data/lib/risbn/scanner.rb +55 -0
  5. metadata +2 -1
data/README.rdoc CHANGED
@@ -1,9 +1,32 @@
1
1
  = risbn
2
2
 
3
- Description goes here.
3
+ Minimal set of tools for working with isbns from ruby.
4
+
5
+ Supports both isbn-10 and isbn-13.
6
+
7
+ Provides a simple (barebones) tool for extracting isbns from pdf and chm files.
8
+
9
+ == Examples:
10
+
11
+ isbn = RISBN.parse_first("Some text with and isbn: ISBN-13: 978-0393317732") # => <RISBN isbn="9780393317732">
12
+ isbn.valid? # => true
13
+
14
+ require 'risbn/scanner'
15
+ RISBN::Scanner.scan("some/file.pdf")
16
+ RISBN::Scanner.scan("some/file.chm")
17
+ RISBN::Scanner.scan("some/file.tx")
18
+
19
+ == Notes
20
+
21
+ Currently only works on unix like platforms.
22
+
23
+ Requires the following tools for scanning files:
24
+
25
+ * Poppler for pdf (pdftotext utility)
26
+ * Archmage for chm
4
27
 
5
28
  == Note on Patches/Pull Requests
6
-
29
+
7
30
  * Fork the project.
8
31
  * Make your feature addition or bug fix.
9
32
  * Add tests for it. This is important so I don't break it in a
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.2.0
data/lib/risbn.rb CHANGED
@@ -17,7 +17,7 @@ class RISBN
17
17
 
18
18
  # Provide a string with the isbn. Any non digit or X character will be removed.
19
19
  def initialize(code = "")
20
- @isbn = code.to_s.upcase.gsub(/[^0-9X]/, "")
20
+ @isbn = (code || "").to_s.upcase.gsub(/[^0-9X]/, "")
21
21
  end
22
22
 
23
23
  def valid?
@@ -0,0 +1,55 @@
1
+ require 'shellwords'
2
+ require 'tmpdir'
3
+ require 'iconv'
4
+
5
+ class RISBN
6
+
7
+ # Scan a file for a isbn. Currently only text files, pdf and chm files are allowed.
8
+ # Uses unix 'file' command to identify the file.
9
+ # For pdf scanning uses poppler, for chm scanning uses archmage.
10
+ module Scanner
11
+ extend self
12
+
13
+ # provide a file path of a file to scan for the first found isbn.
14
+ # currently scans pdfs using poppler, chm using archmage and text files.
15
+ # Also, requires the unix utility "file"
16
+ def scan(path)
17
+ case identify(path)
18
+ when /PDF/ then scan_pdf(path)
19
+ when /HtmlHelp/ then scan_chm(path)
20
+ when /text/ then scan_txt(path)
21
+ end || RISBN.new
22
+ end
23
+
24
+ def identify(path)
25
+ File.file?(path) ? %x|file -F :::: #{path.to_s.shellescape}|.split("::::").last.strip : ""
26
+ end
27
+
28
+ def scan_chm(path)
29
+ Dir.mktmpdir do |dir|
30
+ tmp = File.join(dir, "tempfile.txt")
31
+ system("python -W ignore $(which archmage) -c text #{ path.to_s.shellescape } #{ tmp.to_s.shellescape } 2>&1 > /dev/null")
32
+ scan_txt(tmp)
33
+ end
34
+ end
35
+
36
+ def scan_pdf(path)
37
+ Dir.mktmpdir do |dir|
38
+ tmp = File.join(dir, "tempfile.txt")
39
+ system("pdftotext -q -f 0 -l 20 -raw -nopgbrk #{ path.to_s.shellescape } #{ tmp.to_s.shellescape }")
40
+ scan_txt(tmp)
41
+ end
42
+ end
43
+
44
+ def scan_txt(path)
45
+ IO.foreach(path) do |line|
46
+ isbn = RISBN.parse_first(line)
47
+ return isbn if isbn.valid?
48
+ end
49
+ nil
50
+ rescue # any problem with the text encoding
51
+ nil
52
+ end
53
+
54
+ end
55
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: risbn
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Emmanuel Oga
@@ -39,6 +39,7 @@ files:
39
39
  - Rakefile
40
40
  - VERSION
41
41
  - lib/risbn.rb
42
+ - lib/risbn/scanner.rb
42
43
  - spec/risbn_spec.rb
43
44
  - spec/spec_helper.rb
44
45
  has_rdoc: true