filerary 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5dada2e80e774e32a089e8a3797dbacb0275b4a6
|
4
|
+
data.tar.gz: ac63271136d1de8303d5a63d7cc6e8d69a9720f6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 104bfc5442ee05c60abbc20112c5e09a5644058a55c3dc471c497e7c3c1a12dfb0ec9a042e688a23a97fbbb932718308e52ce10bdc033302d08b4aafbf5842cd
|
7
|
+
data.tar.gz: 9d66a9d851a1856a07e3d64af1561669fa3b6d70f1641a05b0cb13d8229248678ca9048b27db89e88ee9ce330d0b2efdf31c4447502a1e4e7bb8b1990e52695a
|
data/lib/filerary/librarian.rb
CHANGED
@@ -10,9 +10,14 @@ module URI
|
|
10
10
|
class Generic
|
11
11
|
alias :__path__ :path
|
12
12
|
def path
|
13
|
-
URI.
|
13
|
+
URI.decode_www_form_component(__path__, Encoding.find("locale"))
|
14
14
|
end
|
15
15
|
end
|
16
|
+
|
17
|
+
def self.parse(uri)
|
18
|
+
uri = URI.encode_www_form_component(uri)
|
19
|
+
DEFAULT_PARSER.parse(uri)
|
20
|
+
end
|
16
21
|
end
|
17
22
|
|
18
23
|
module Filerary
|
@@ -50,7 +55,9 @@ module Filerary
|
|
50
55
|
record.content =~ word
|
51
56
|
end
|
52
57
|
|
53
|
-
result.collect
|
58
|
+
result.collect do |record|
|
59
|
+
record._key.force_encoding(Encoding.find("locale"))
|
60
|
+
end
|
54
61
|
end
|
55
62
|
|
56
63
|
def cleanup
|
@@ -73,21 +80,17 @@ module Filerary
|
|
73
80
|
extractor = ChupaText::Extractor.new
|
74
81
|
extractor.apply_configuration(ChupaText::Configuration.default)
|
75
82
|
|
76
|
-
|
77
|
-
|
78
|
-
extractor.extract(URI.encode(path)) do |text_data|
|
79
|
-
text = text_data.body
|
80
|
-
end
|
81
|
-
rescue URI::InvalidURIError
|
82
|
-
return path
|
83
|
+
extractor.extract(path) do |text_data|
|
84
|
+
text = text_data.body
|
83
85
|
end
|
84
86
|
|
85
87
|
return path unless text
|
86
88
|
|
87
89
|
# TODO: I want to specify encoding in ChupaText side.
|
88
|
-
text.force_encoding("UTF-8")
|
89
|
-
return text if text.valid_encoding?
|
90
90
|
text.force_encoding(Encoding.default_external)
|
91
|
+
text.force_encoding("UTF-8") unless text.valid_encoding?
|
92
|
+
|
93
|
+
text
|
91
94
|
end
|
92
95
|
end
|
93
96
|
end
|
data/lib/filerary/version.rb
CHANGED
data/test/fixtures//343/203/236/343/203/253/343/203/201/343/203/220/343/202/244/343/203/210.txt
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
UTF-8のファイル名
|
data/test/test-librarian.rb
CHANGED
@@ -26,22 +26,33 @@ class FileraryTest < Test::Unit::TestCase
|
|
26
26
|
end
|
27
27
|
|
28
28
|
class CollectTest < self
|
29
|
-
|
30
|
-
|
29
|
+
class ArgumentTest < self
|
30
|
+
def test_string
|
31
|
+
assert_equal([__FILE__], @librarian.collect(__FILE__))
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_array
|
35
|
+
assert_equal([__FILE__], @librarian.collect([__FILE__]))
|
36
|
+
end
|
31
37
|
end
|
32
38
|
|
33
|
-
|
34
|
-
|
39
|
+
class FilePathTest < self
|
40
|
+
def test_multibyte
|
41
|
+
path = File.join(@test_fixtures_dir, "マルチバイト.txt")
|
42
|
+
assert_equal([path], @librarian.collect(path))
|
43
|
+
end
|
35
44
|
end
|
36
45
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
46
|
+
class FileTypeTest < self
|
47
|
+
def test_pdf
|
48
|
+
path = File.join(@test_fixtures_dir, "test-pdf.pdf")
|
49
|
+
assert_equal([path], @librarian.collect(path))
|
50
|
+
end
|
41
51
|
|
42
|
-
|
43
|
-
|
44
|
-
|
52
|
+
def test_xls
|
53
|
+
path = File.join(@test_fixtures_dir, "test-excel.xls")
|
54
|
+
assert_equal([path], @librarian.collect(path))
|
55
|
+
end
|
45
56
|
end
|
46
57
|
end
|
47
58
|
|
@@ -56,26 +67,28 @@ class FileraryTest < Test::Unit::TestCase
|
|
56
67
|
assert_equal([], @librarian.search("AAA" * 5))
|
57
68
|
end
|
58
69
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
70
|
+
class FileTypeTest < self
|
71
|
+
def test_pdf
|
72
|
+
path = File.join(@test_fixtures_dir, "test-pdf.pdf")
|
73
|
+
@librarian.collect(path)
|
74
|
+
assert_equal([path], @librarian.search("秋"))
|
75
|
+
assert_equal([], @librarian.search("冬"))
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_xls
|
79
|
+
path = File.join(@test_fixtures_dir, "test-excel.xls")
|
80
|
+
@librarian.collect(path)
|
81
|
+
assert_equal([path], @librarian.search("Excel"))
|
82
|
+
end
|
83
|
+
|
84
|
+
def test_xls_of_multibyte
|
85
|
+
# TODO: I want to detective.
|
86
|
+
omit_on_travis_ci
|
87
|
+
path = File.join(@test_fixtures_dir, "test-excel.xls")
|
88
|
+
@librarian.collect(path)
|
89
|
+
assert_equal([path], @librarian.search("表計算ソフト"))
|
90
|
+
assert_equal([], @librarian.search("文書作成ソフト"))
|
91
|
+
end
|
79
92
|
end
|
80
93
|
end
|
81
94
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: filerary
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Masafumi Yokoyama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-02-
|
11
|
+
date: 2014-02-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: grn_mini
|
@@ -174,6 +174,7 @@ files:
|
|
174
174
|
- test/filerary-test-utils.rb
|
175
175
|
- test/fixtures/test-excel.xls
|
176
176
|
- test/fixtures/test-pdf.pdf
|
177
|
+
- test/fixtures/マルチバイト.txt
|
177
178
|
- test/run-test.rb
|
178
179
|
- test/test-librarian.rb
|
179
180
|
homepage: https://github.com/myokoym/filerary
|
@@ -206,5 +207,7 @@ test_files:
|
|
206
207
|
- test/filerary-test-utils.rb
|
207
208
|
- test/fixtures/test-excel.xls
|
208
209
|
- test/fixtures/test-pdf.pdf
|
210
|
+
- test/fixtures/マルチバイト.txt
|
209
211
|
- test/run-test.rb
|
210
212
|
- test/test-librarian.rb
|
213
|
+
has_rdoc:
|