filerary 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e0e03b2bc1bcc04674c5c134c54fbcd6385cda9d
4
- data.tar.gz: ca7cb1b5ebfb1ee1b3cafbf77dbb6f11ebff18c1
3
+ metadata.gz: 6cb82ae7cf6e47684a98d9fd942ed055f624b9cf
4
+ data.tar.gz: 827a84c3b79c5e50300cc814edcd849b927b047f
5
5
  SHA512:
6
- metadata.gz: 3860577c8ba2591d8b506dd22e5aa668f255f006c7e7e99541e916a4b0bb4fcc68d8768646da7ee4c90a980b85f8c6db74700fb29eccab40c256bb3ae71c494b
7
- data.tar.gz: a5a6bc5c8b8fbbfbb274e62887a29f7ca825a505533d4bf3917ec8a4c2427d914aaa4f71c9685d6ccdd9209fcdd613ab55d9120a8a2b5bdecc5cc0a780df4e9c
6
+ metadata.gz: 0822bb86cbe996ef71a856bd54a3ca6fb504527b4fa3e818ccaf4bc79edc04a235da33cb959246ed9961bd341040ccfea7be3c3922f90e62c7061f2fc256d4c1
7
+ data.tar.gz: a7ae84144ba9abda1bfd1ce4c8826aa5f44bd1da9ed8914d7692d7349b46253fbf55b844d889ae1f7fba00ec808dd3db8ec067bd3f125ab2c842390296eb3151
@@ -1,6 +1,9 @@
1
1
  require "fileutils"
2
+ require "uri"
2
3
  require "grn_mini"
3
4
  require "chupa-text"
5
+ gem "chupa-text-decomposer-pdf"
6
+ gem "chupa-text-decomposer-libreoffice"
4
7
 
5
8
  module Filerary
6
9
  class Librarian
@@ -60,14 +63,20 @@ module Filerary
60
63
  extractor = ChupaText::Extractor.new
61
64
  extractor.apply_configuration(ChupaText::Configuration.default)
62
65
 
63
- extractor.extract(path) do |text_data|
64
- text = text_data.body
66
+ begin
67
+ extractor.extract(URI.encode(path)) do |text_data|
68
+ text = text_data.body
69
+ end
70
+ rescue URI::InvalidURIError
71
+ return path
65
72
  end
66
73
 
67
- # TODO: I want to specify encoding in ChupaText side.
68
- text.force_encoding(Encoding.default_external || "UTF-8")
74
+ return path unless text
69
75
 
70
- text
76
+ # TODO: I want to specify encoding in ChupaText side.
77
+ text.force_encoding("UTF-8")
78
+ return text if text.valid_encoding?
79
+ text.force_encoding(Encoding.default_external)
71
80
  end
72
81
  end
73
82
  end
@@ -1,3 +1,3 @@
1
1
  module Filerary
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: filerary
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Masafumi Yokoyama