filerary 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/filerary/librarian.rb +14 -5
- data/lib/filerary/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6cb82ae7cf6e47684a98d9fd942ed055f624b9cf
|
4
|
+
data.tar.gz: 827a84c3b79c5e50300cc814edcd849b927b047f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0822bb86cbe996ef71a856bd54a3ca6fb504527b4fa3e818ccaf4bc79edc04a235da33cb959246ed9961bd341040ccfea7be3c3922f90e62c7061f2fc256d4c1
|
7
|
+
data.tar.gz: a7ae84144ba9abda1bfd1ce4c8826aa5f44bd1da9ed8914d7692d7349b46253fbf55b844d889ae1f7fba00ec808dd3db8ec067bd3f125ab2c842390296eb3151
|
data/lib/filerary/librarian.rb
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
require "fileutils"
|
2
|
+
require "uri"
|
2
3
|
require "grn_mini"
|
3
4
|
require "chupa-text"
|
5
|
+
gem "chupa-text-decomposer-pdf"
|
6
|
+
gem "chupa-text-decomposer-libreoffice"
|
4
7
|
|
5
8
|
module Filerary
|
6
9
|
class Librarian
|
@@ -60,14 +63,20 @@ module Filerary
|
|
60
63
|
extractor = ChupaText::Extractor.new
|
61
64
|
extractor.apply_configuration(ChupaText::Configuration.default)
|
62
65
|
|
63
|
-
|
64
|
-
|
66
|
+
begin
|
67
|
+
extractor.extract(URI.encode(path)) do |text_data|
|
68
|
+
text = text_data.body
|
69
|
+
end
|
70
|
+
rescue URI::InvalidURIError
|
71
|
+
return path
|
65
72
|
end
|
66
73
|
|
67
|
-
|
68
|
-
text.force_encoding(Encoding.default_external || "UTF-8")
|
74
|
+
return path unless text
|
69
75
|
|
70
|
-
|
76
|
+
# TODO: I want to specify encoding in ChupaText side.
|
77
|
+
text.force_encoding("UTF-8")
|
78
|
+
return text if text.valid_encoding?
|
79
|
+
text.force_encoding(Encoding.default_external)
|
71
80
|
end
|
72
81
|
end
|
73
82
|
end
|
data/lib/filerary/version.rb
CHANGED