risbn 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. data/README.rdoc +25 -2
  2. data/VERSION +1 -1
  3. data/lib/risbn.rb +1 -1
  4. data/lib/risbn/scanner.rb +55 -0
  5. metadata +2 -1
data/README.rdoc CHANGED
@@ -1,9 +1,32 @@
1
1
  = risbn
2
2
 
3
- Description goes here.
3
+ Minimal set of tools for working with isbns from ruby.
4
+
5
+ Supports both isbn-10 and isbn-13.
6
+
7
+ Provides a simple (barebones) tool for extracting isbns from pdf and chm files.
8
+
9
+ == Examples:
10
+
11
+ isbn = RISBN.parse_first("Some text with and isbn: ISBN-13: 978-0393317732") # => <RISBN isbn="9780393317732">
12
+ isbn.valid? # => true
13
+
14
+ require 'risbn/scanner'
15
+ RISBN::Scanner.scan("some/file.pdf")
16
+ RISBN::Scanner.scan("some/file.chm")
17
+ RISBN::Scanner.scan("some/file.tx")
18
+
19
+ == Notes
20
+
21
+ Currently only works on unix like platforms.
22
+
23
+ Requires the following tools for scanning files:
24
+
25
+ * Poppler for pdf (pdftotext utility)
26
+ * Archmage for chm
4
27
 
5
28
  == Note on Patches/Pull Requests
6
-
29
+
7
30
  * Fork the project.
8
31
  * Make your feature addition or bug fix.
9
32
  * Add tests for it. This is important so I don't break it in a
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.2.0
data/lib/risbn.rb CHANGED
@@ -17,7 +17,7 @@ class RISBN
17
17
 
18
18
  # Provide a string with the isbn. Any non digit or X character will be removed.
19
19
  def initialize(code = "")
20
- @isbn = code.to_s.upcase.gsub(/[^0-9X]/, "")
20
+ @isbn = (code || "").to_s.upcase.gsub(/[^0-9X]/, "")
21
21
  end
22
22
 
23
23
  def valid?
@@ -0,0 +1,55 @@
1
+ require 'shellwords'
2
+ require 'tmpdir'
3
+ require 'iconv'
4
+
5
+ class RISBN
6
+
7
+ # Scan a file for a isbn. Currently only text files, pdf and chm files are allowed.
8
+ # Uses unix 'file' command to identify the file.
9
+ # For pdf scanning uses poppler, for chm scanning uses archmage.
10
+ module Scanner
11
+ extend self
12
+
13
+ # provide a file path of a file to scan for the first found isbn.
14
+ # currently scans pdfs using poppler, chm using archmage and text files.
15
+ # Also, requires the unix utility "file"
16
+ def scan(path)
17
+ case identify(path)
18
+ when /PDF/ then scan_pdf(path)
19
+ when /HtmlHelp/ then scan_chm(path)
20
+ when /text/ then scan_txt(path)
21
+ end || RISBN.new
22
+ end
23
+
24
+ def identify(path)
25
+ File.file?(path) ? %x|file -F :::: #{path.to_s.shellescape}|.split("::::").last.strip : ""
26
+ end
27
+
28
+ def scan_chm(path)
29
+ Dir.mktmpdir do |dir|
30
+ tmp = File.join(dir, "tempfile.txt")
31
+ system("python -W ignore $(which archmage) -c text #{ path.to_s.shellescape } #{ tmp.to_s.shellescape } 2>&1 > /dev/null")
32
+ scan_txt(tmp)
33
+ end
34
+ end
35
+
36
+ def scan_pdf(path)
37
+ Dir.mktmpdir do |dir|
38
+ tmp = File.join(dir, "tempfile.txt")
39
+ system("pdftotext -q -f 0 -l 20 -raw -nopgbrk #{ path.to_s.shellescape } #{ tmp.to_s.shellescape }")
40
+ scan_txt(tmp)
41
+ end
42
+ end
43
+
44
+ def scan_txt(path)
45
+ IO.foreach(path) do |line|
46
+ isbn = RISBN.parse_first(line)
47
+ return isbn if isbn.valid?
48
+ end
49
+ nil
50
+ rescue # any problem with the text encoding
51
+ nil
52
+ end
53
+
54
+ end
55
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: risbn
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Emmanuel Oga
@@ -39,6 +39,7 @@ files:
39
39
  - Rakefile
40
40
  - VERSION
41
41
  - lib/risbn.rb
42
+ - lib/risbn/scanner.rb
42
43
  - spec/risbn_spec.rb
43
44
  - spec/spec_helper.rb
44
45
  has_rdoc: true