filerary 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5dada2e80e774e32a089e8a3797dbacb0275b4a6
|
4
|
+
data.tar.gz: ac63271136d1de8303d5a63d7cc6e8d69a9720f6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 104bfc5442ee05c60abbc20112c5e09a5644058a55c3dc471c497e7c3c1a12dfb0ec9a042e688a23a97fbbb932718308e52ce10bdc033302d08b4aafbf5842cd
|
7
|
+
data.tar.gz: 9d66a9d851a1856a07e3d64af1561669fa3b6d70f1641a05b0cb13d8229248678ca9048b27db89e88ee9ce330d0b2efdf31c4447502a1e4e7bb8b1990e52695a
|
data/lib/filerary/librarian.rb
CHANGED
@@ -10,9 +10,14 @@ module URI
|
|
10
10
|
class Generic
|
11
11
|
alias :__path__ :path
|
12
12
|
def path
|
13
|
-
URI.
|
13
|
+
URI.decode_www_form_component(__path__, Encoding.find("locale"))
|
14
14
|
end
|
15
15
|
end
|
16
|
+
|
17
|
+
def self.parse(uri)
|
18
|
+
uri = URI.encode_www_form_component(uri)
|
19
|
+
DEFAULT_PARSER.parse(uri)
|
20
|
+
end
|
16
21
|
end
|
17
22
|
|
18
23
|
module Filerary
|
@@ -50,7 +55,9 @@ module Filerary
|
|
50
55
|
record.content =~ word
|
51
56
|
end
|
52
57
|
|
53
|
-
result.collect
|
58
|
+
result.collect do |record|
|
59
|
+
record._key.force_encoding(Encoding.find("locale"))
|
60
|
+
end
|
54
61
|
end
|
55
62
|
|
56
63
|
def cleanup
|
@@ -73,21 +80,17 @@ module Filerary
|
|
73
80
|
extractor = ChupaText::Extractor.new
|
74
81
|
extractor.apply_configuration(ChupaText::Configuration.default)
|
75
82
|
|
76
|
-
|
77
|
-
|
78
|
-
extractor.extract(URI.encode(path)) do |text_data|
|
79
|
-
text = text_data.body
|
80
|
-
end
|
81
|
-
rescue URI::InvalidURIError
|
82
|
-
return path
|
83
|
+
extractor.extract(path) do |text_data|
|
84
|
+
text = text_data.body
|
83
85
|
end
|
84
86
|
|
85
87
|
return path unless text
|
86
88
|
|
87
89
|
# TODO: I want to specify encoding in ChupaText side.
|
88
|
-
text.force_encoding("UTF-8")
|
89
|
-
return text if text.valid_encoding?
|
90
90
|
text.force_encoding(Encoding.default_external)
|
91
|
+
text.force_encoding("UTF-8") unless text.valid_encoding?
|
92
|
+
|
93
|
+
text
|
91
94
|
end
|
92
95
|
end
|
93
96
|
end
|
data/lib/filerary/version.rb
CHANGED
data/test/fixtures//343/203/236/343/203/253/343/203/201/343/203/220/343/202/244/343/203/210.txt
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
UTF-8のファイル名
|
data/test/test-librarian.rb
CHANGED
@@ -26,22 +26,33 @@ class FileraryTest < Test::Unit::TestCase
|
|
26
26
|
end
|
27
27
|
|
28
28
|
class CollectTest < self
|
29
|
-
|
30
|
-
|
29
|
+
class ArgumentTest < self
|
30
|
+
def test_string
|
31
|
+
assert_equal([__FILE__], @librarian.collect(__FILE__))
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_array
|
35
|
+
assert_equal([__FILE__], @librarian.collect([__FILE__]))
|
36
|
+
end
|
31
37
|
end
|
32
38
|
|
33
|
-
|
34
|
-
|
39
|
+
class FilePathTest < self
|
40
|
+
def test_multibyte
|
41
|
+
path = File.join(@test_fixtures_dir, "マルチバイト.txt")
|
42
|
+
assert_equal([path], @librarian.collect(path))
|
43
|
+
end
|
35
44
|
end
|
36
45
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
46
|
+
class FileTypeTest < self
|
47
|
+
def test_pdf
|
48
|
+
path = File.join(@test_fixtures_dir, "test-pdf.pdf")
|
49
|
+
assert_equal([path], @librarian.collect(path))
|
50
|
+
end
|
41
51
|
|
42
|
-
|
43
|
-
|
44
|
-
|
52
|
+
def test_xls
|
53
|
+
path = File.join(@test_fixtures_dir, "test-excel.xls")
|
54
|
+
assert_equal([path], @librarian.collect(path))
|
55
|
+
end
|
45
56
|
end
|
46
57
|
end
|
47
58
|
|
@@ -56,26 +67,28 @@ class FileraryTest < Test::Unit::TestCase
|
|
56
67
|
assert_equal([], @librarian.search("AAA" * 5))
|
57
68
|
end
|
58
69
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
70
|
+
class FileTypeTest < self
|
71
|
+
def test_pdf
|
72
|
+
path = File.join(@test_fixtures_dir, "test-pdf.pdf")
|
73
|
+
@librarian.collect(path)
|
74
|
+
assert_equal([path], @librarian.search("秋"))
|
75
|
+
assert_equal([], @librarian.search("冬"))
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_xls
|
79
|
+
path = File.join(@test_fixtures_dir, "test-excel.xls")
|
80
|
+
@librarian.collect(path)
|
81
|
+
assert_equal([path], @librarian.search("Excel"))
|
82
|
+
end
|
83
|
+
|
84
|
+
def test_xls_of_multibyte
|
85
|
+
# TODO: I want to detective.
|
86
|
+
omit_on_travis_ci
|
87
|
+
path = File.join(@test_fixtures_dir, "test-excel.xls")
|
88
|
+
@librarian.collect(path)
|
89
|
+
assert_equal([path], @librarian.search("表計算ソフト"))
|
90
|
+
assert_equal([], @librarian.search("文書作成ソフト"))
|
91
|
+
end
|
79
92
|
end
|
80
93
|
end
|
81
94
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: filerary
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Masafumi Yokoyama
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-02-
|
11
|
+
date: 2014-02-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: grn_mini
|
@@ -174,6 +174,7 @@ files:
|
|
174
174
|
- test/filerary-test-utils.rb
|
175
175
|
- test/fixtures/test-excel.xls
|
176
176
|
- test/fixtures/test-pdf.pdf
|
177
|
+
- test/fixtures/マルチバイト.txt
|
177
178
|
- test/run-test.rb
|
178
179
|
- test/test-librarian.rb
|
179
180
|
homepage: https://github.com/myokoym/filerary
|
@@ -206,5 +207,7 @@ test_files:
|
|
206
207
|
- test/filerary-test-utils.rb
|
207
208
|
- test/fixtures/test-excel.xls
|
208
209
|
- test/fixtures/test-pdf.pdf
|
210
|
+
- test/fixtures/マルチバイト.txt
|
209
211
|
- test/run-test.rb
|
210
212
|
- test/test-librarian.rb
|
213
|
+
has_rdoc:
|