file_bsearch 1.0.2 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.ja.md +34 -12
- data/README.md +32 -12
- data/file_bsearch.gemspec +5 -1
- data/lib/file_bsearch/common.rb +15 -91
- data/lib/file_bsearch/get_lines.rb +12 -125
- data/lib/file_bsearch/main.rb +6 -4
- data/lib/file_bsearch/version.rb +1 -1
- data/spec/file_bsearch_spec.rb +112 -30
- data/spec/lib/my_spec_helper.rb +60 -12
- metadata +18 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 789ee1ff24c686a8a3a55d6217bd5b20de1dec49
|
4
|
+
data.tar.gz: 003864105b6c297bb57688e64346cc5d8ffc85cb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1a506a023c0eff005ecba65ff699775bf14a35b4474077f1becb9f5d6f8ccf11b755ce553097db3abd48886c848118f10460892b09ba669fe11f8e57549e8bcf
|
7
|
+
data.tar.gz: cad31db3261cc1e89f2846665627a7c0f9efacab7b2a72aed34b66c81ec826876bdeaa6208b33eef1267fd418ad9c8861fe82fa69d41b9881b5c357df3df80ca
|
data/README.ja.md
CHANGED
@@ -13,44 +13,66 @@ gem install file_bsearch
|
|
13
13
|
|
14
14
|
## Usage
|
15
15
|
|
16
|
+
### 引数
|
17
|
+
|
18
|
+
Usage のドキュメント内で使用する共通の変数について、以下に解説を記します。
|
19
|
+
|
20
|
+
#### encoding
|
21
|
+
|
22
|
+
文字コードの指定。
|
23
|
+
|
24
|
+
utf8, eucjp, jis, sjis か、それぞれの接頭辞を渡してください。省略するとアスキー文字で検索をおこないます。
|
25
|
+
|
26
|
+
#### path
|
27
|
+
|
28
|
+
ソート済みのテキストファイルのパス。
|
29
|
+
|
30
|
+
#### string
|
31
|
+
|
32
|
+
検索に用いる文字列。
|
33
|
+
|
34
|
+
#### prefix
|
35
|
+
|
36
|
+
検索に用いる、接頭辞を表す文字列。
|
37
|
+
|
16
38
|
### 指定文字列の行が、ファイル内に存在するか
|
17
39
|
|
18
40
|
```ruby
|
19
|
-
FileBsearch.exist?(path, string)
|
41
|
+
FileBsearch.exist?(path, string, encoding)
|
20
42
|
|
21
|
-
File.bsearch?(path, string)
|
43
|
+
File.bsearch?(path, string, encoding)
|
22
44
|
|
23
45
|
file = open(path)
|
24
|
-
file.bsearch?(string)
|
46
|
+
file.bsearch?(string, encoding)
|
25
47
|
```
|
26
48
|
|
27
|
-
存在する場合は true 、存在しない場合は false
|
49
|
+
存在する場合は true 、存在しない場合は false を返します。
|
28
50
|
|
29
51
|
### 指定文字列の行が、ファイル内のどの位置にあるか
|
30
52
|
|
31
53
|
```ruby
|
32
|
-
FileBsearch.index(path, string)
|
54
|
+
FileBsearch.index(path, string, encoding)
|
33
55
|
|
34
|
-
File.bsearch(path, string)
|
56
|
+
File.bsearch(path, string, encoding)
|
35
57
|
|
36
58
|
file = open(path)
|
37
|
-
file.bsearch(string)
|
59
|
+
file.bsearch(string, encoding)
|
38
60
|
```
|
39
61
|
|
40
|
-
|
62
|
+
存在する場合はその行の先頭のファイルポインタ位置の数値、存在しない場合は false を返します。
|
41
63
|
|
42
64
|
### 指定文字列から始まる行の取得
|
43
65
|
|
44
66
|
```ruby
|
45
|
-
FileBsearch.get_lines(path, prefix)
|
67
|
+
FileBsearch.get_lines(path, prefix, encoding)
|
46
68
|
|
47
|
-
File.bsearch_lines(path, prefix)
|
69
|
+
File.bsearch_lines(path, prefix, encoding)
|
48
70
|
|
49
71
|
file = open(path)
|
50
|
-
file.bsearch_lines(prefix)
|
72
|
+
file.bsearch_lines(prefix, encoding)
|
51
73
|
```
|
52
74
|
|
53
|
-
|
75
|
+
存在する場合はそれらの行の文字列を含んだ配列、存在しない場合は空の配列を返します。
|
54
76
|
|
55
77
|
## Contributing
|
56
78
|
|
data/README.md
CHANGED
@@ -13,15 +13,35 @@ gem install file_bsearch
|
|
13
13
|
|
14
14
|
## Usage
|
15
15
|
|
16
|
+
### common arguments
|
17
|
+
|
18
|
+
common arguments in usage document. mean of each arguments are following.
|
19
|
+
|
20
|
+
#### encoding
|
21
|
+
|
22
|
+
for character encodings. you can pass String object that is 'utf8', 'eucjp', 'jis', 'sjis' or each prefix. if you pass nil or does not pass, search for ascii character.
|
23
|
+
|
24
|
+
#### path
|
25
|
+
|
26
|
+
path for sorted text file.
|
27
|
+
|
28
|
+
#### string
|
29
|
+
|
30
|
+
string for search.
|
31
|
+
|
32
|
+
#### prefix
|
33
|
+
|
34
|
+
string for prefix search.
|
35
|
+
|
16
36
|
### check to exist line that identical with passed string
|
17
37
|
|
18
38
|
```ruby
|
19
|
-
FileBsearch.exist?(path, string)
|
39
|
+
FileBsearch.exist?(path, string, encoding)
|
20
40
|
|
21
|
-
File.bsearch?(path, string)
|
41
|
+
File.bsearch?(path, string, encoding)
|
22
42
|
|
23
43
|
file = open(path)
|
24
|
-
file.bsearch?(string)
|
44
|
+
file.bsearch?(string, encoding)
|
25
45
|
```
|
26
46
|
|
27
47
|
if exists, return true.
|
@@ -30,29 +50,29 @@ if not exists, return false.
|
|
30
50
|
### search position in file for the line that is identical with passed string
|
31
51
|
|
32
52
|
```ruby
|
33
|
-
FileBsearch.index(path, string)
|
53
|
+
FileBsearch.index(path, string, encoding)
|
34
54
|
|
35
|
-
File.bsearch(path, string)
|
55
|
+
File.bsearch(path, string, encoding)
|
36
56
|
|
37
57
|
file = open(path)
|
38
|
-
file.bsearch(string)
|
58
|
+
file.bsearch(string, encoding)
|
39
59
|
```
|
40
60
|
|
41
|
-
if exists, return Integer object that
|
61
|
+
if exists, return Integer object that has the position of file pointer for head of matched line.
|
42
62
|
if not exists, return false.
|
43
63
|
|
44
64
|
### get lines that matched string with passed prefix
|
45
65
|
|
46
66
|
```ruby
|
47
|
-
FileBsearch.get_lines(path, prefix)
|
67
|
+
FileBsearch.get_lines(path, prefix, encoding)
|
48
68
|
|
49
|
-
File.bsearch_lines(path, prefix)
|
69
|
+
File.bsearch_lines(path, prefix, encoding)
|
50
70
|
|
51
71
|
file = open(path)
|
52
|
-
file.bsearch_lines(prefix)
|
72
|
+
file.bsearch_lines(prefix, encoding)
|
53
73
|
```
|
54
74
|
|
55
|
-
if exists, return Array object that
|
75
|
+
if exists, return Array object that has the matched lines.
|
56
76
|
if not exists, return empty Array object.
|
57
77
|
|
58
78
|
## Contributing
|
@@ -65,4 +85,4 @@ if not exists, return empty Array object.
|
|
65
85
|
|
66
86
|
## Author
|
67
87
|
|
68
|
-
[indeep-xyz](http://indeep.xyz/)
|
88
|
+
[indeep-xyz](http://indeep.xyz/) (Japanese language)
|
data/file_bsearch.gemspec
CHANGED
@@ -9,7 +9,9 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ["indeep-xyz"]
|
10
10
|
spec.email = ["indeep.xyz@gmail.com"]
|
11
11
|
spec.summary = %q{binary search for sorted text file.}
|
12
|
-
spec.description =
|
12
|
+
spec.description = <<EOS
|
13
|
+
binary search for sorted text file. it is effective when file size is bigger.
|
14
|
+
EOS
|
13
15
|
spec.homepage = "https://github.com/indeep-xyz/ruby-file-bsearch/"
|
14
16
|
spec.license = "MIT"
|
15
17
|
|
@@ -18,6 +20,8 @@ Gem::Specification.new do |spec|
|
|
18
20
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
21
|
spec.require_paths = ["lib"]
|
20
22
|
|
23
|
+
spec.add_runtime_dependency "file_char_licker", "~> 0.5"
|
24
|
+
|
21
25
|
spec.add_development_dependency "bundler", "~> 1.7"
|
22
26
|
spec.add_development_dependency "rake", "~> 10.0"
|
23
27
|
spec.add_development_dependency "rspec"
|
data/lib/file_bsearch/common.rb
CHANGED
@@ -1,87 +1,9 @@
|
|
1
|
-
|
2
|
-
class << self
|
3
|
-
|
4
|
-
private
|
5
|
-
|
6
|
-
# get backward lines
|
7
|
-
#
|
8
|
-
# args
|
9
|
-
# file ... File object
|
10
|
-
# #pos value should be at SOL (Start Of Line)
|
11
|
-
# size ... indication of reading size
|
12
|
-
#
|
13
|
-
# returner
|
14
|
-
# String object as lines
|
15
|
-
def backward_lines(file, size = 512)
|
16
|
-
|
17
|
-
result = ""
|
18
|
-
|
19
|
-
while file.pos > 0
|
20
|
-
|
21
|
-
file.seek(-1, IO::SEEK_CUR)
|
22
|
-
char = file.getc
|
23
|
-
|
24
|
-
if char.match(/[\r\n]/) && result.size > size
|
25
|
-
break
|
26
|
-
else
|
27
|
-
result.insert(0, char)
|
28
|
-
file.seek(-1, IO::SEEK_CUR)
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
result
|
33
|
-
end
|
34
|
-
|
35
|
-
# get a line string at current position
|
36
|
-
#
|
37
|
-
# args
|
38
|
-
# file ... File object
|
39
|
-
def current_line(file)
|
40
|
-
|
41
|
-
if file.pos > 0
|
42
|
-
|
43
|
-
# move pointer to before character
|
44
|
-
file.seek(-1, IO::SEEK_CUR)
|
1
|
+
# coding: utf-8
|
45
2
|
|
46
|
-
|
47
|
-
# - move pointer until reach to EOL of before line.
|
48
|
-
until file.getc.match(/[\n\r]/)
|
3
|
+
require "file_char_licker"
|
49
4
|
|
50
|
-
|
51
|
-
|
52
|
-
file.seek(-2, IO::SEEK_CUR)
|
53
|
-
else
|
54
|
-
|
55
|
-
# if EOS, break
|
56
|
-
file.rewind
|
57
|
-
break
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
file.gets
|
63
|
-
end
|
64
|
-
|
65
|
-
# get forward lines
|
66
|
-
#
|
67
|
-
# args
|
68
|
-
# file ... File object
|
69
|
-
# #pos value should be at SOL (Start Of Line)
|
70
|
-
# size ... indication of reading size
|
71
|
-
#
|
72
|
-
# returner
|
73
|
-
# String object as lines
|
74
|
-
def forward_lines(file, size = 512)
|
75
|
-
|
76
|
-
result = ""
|
77
|
-
|
78
|
-
while result.size < size && !file.eof?
|
79
|
-
|
80
|
-
result += file.gets
|
81
|
-
end
|
82
|
-
|
83
|
-
result
|
84
|
-
end
|
5
|
+
module FileBsearch
|
6
|
+
class << self
|
85
7
|
|
86
8
|
# args
|
87
9
|
# file ... File object
|
@@ -107,7 +29,7 @@ module FileBsearch
|
|
107
29
|
# - if EOF, not found
|
108
30
|
# - if pointer did not moved, not found
|
109
31
|
while max > file.pos \
|
110
|
-
&& (line = current_line
|
32
|
+
&& (line = file.current_line) \
|
111
33
|
&& file.pos != old_pos
|
112
34
|
|
113
35
|
old_pos = file.pos
|
@@ -115,8 +37,8 @@ module FileBsearch
|
|
115
37
|
|
116
38
|
# for debug
|
117
39
|
# p "-- #{needle}, " + {
|
118
|
-
# code: code,
|
119
40
|
# text: line.chomp,
|
41
|
+
# code: code,
|
120
42
|
# min: min,
|
121
43
|
# max: max,
|
122
44
|
# pos: file.pos
|
@@ -128,8 +50,8 @@ module FileBsearch
|
|
128
50
|
# - if match, return
|
129
51
|
case code
|
130
52
|
when -1 then min = file.pos - 1
|
131
|
-
when 1 then max = file.pos - line.
|
132
|
-
else return file.pos - line.
|
53
|
+
when 1 then max = file.pos - line.bytesize
|
54
|
+
else return file.pos - line.bytesize
|
133
55
|
end
|
134
56
|
|
135
57
|
# move to mid point
|
@@ -140,16 +62,18 @@ module FileBsearch
|
|
140
62
|
end
|
141
63
|
|
142
64
|
# args
|
143
|
-
#
|
65
|
+
# file ... File object || path for String object
|
144
66
|
#
|
145
67
|
# returner
|
146
68
|
# File object
|
147
|
-
def
|
69
|
+
def init_file(file, encoding)
|
148
70
|
|
149
|
-
# check the
|
71
|
+
# check the file argument
|
150
72
|
# - if not File object, open it as String for file path
|
151
|
-
|
152
|
-
|
73
|
+
file = File.open(file.to_s) unless file.is_a?(File)
|
74
|
+
FileCharLicker.attach(file, encoding)
|
75
|
+
|
76
|
+
file
|
153
77
|
end
|
154
78
|
end
|
155
79
|
end
|
@@ -1,22 +1,23 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
1
3
|
module FileBsearch
|
2
4
|
class << self
|
3
5
|
|
4
6
|
# returner
|
5
7
|
# Array object
|
6
|
-
def get_lines(
|
8
|
+
def get_lines(file, prefix, encoding = nil, *args)
|
7
9
|
|
8
|
-
|
9
|
-
|
10
|
+
file = init_file(file, encoding)
|
11
|
+
prefix = prefix.to_s
|
10
12
|
|
11
13
|
# pass to scan method
|
12
|
-
pos =
|
13
|
-
line[0, prefix.size] <=> prefix
|
14
|
-
end
|
14
|
+
pos = scan_around_lines_seed(file, prefix, *args)
|
15
15
|
|
16
16
|
if pos.is_a?(Integer)
|
17
17
|
|
18
|
-
|
19
|
-
|
18
|
+
file.seek(pos)
|
19
|
+
lines = file.around_lines(/^#{Regexp.escape(prefix)}/)
|
20
|
+
result = lines.chomp.split(/[\r\n]+/) if lines.bytesize > 0
|
20
21
|
end
|
21
22
|
|
22
23
|
result || []
|
@@ -24,124 +25,10 @@ module FileBsearch
|
|
24
25
|
|
25
26
|
private
|
26
27
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
# file ... File object
|
31
|
-
# pos ... starting point for file#pos
|
32
|
-
# require to be within contiguous range
|
33
|
-
# needle ... RegExp object for contiguous check
|
34
|
-
#
|
35
|
-
# returner
|
36
|
-
# Array object
|
37
|
-
def around_lines(file, pos, needle)
|
38
|
-
|
39
|
-
# scan
|
40
|
-
min = scan_contiguous_min(file, pos, needle) || pos
|
41
|
-
max = scan_contiguous_max(file, pos, needle) || pos
|
42
|
-
|
43
|
-
# read
|
44
|
-
file.seek(min)
|
45
|
-
lines = file.read(max - min).chomp
|
46
|
-
|
47
|
-
# return
|
48
|
-
return lines.split(/[\r\n]+/) if lines.size > 0
|
49
|
-
[]
|
50
|
-
end
|
51
|
-
|
52
|
-
# scan min file#pos of contiguous range.
|
53
|
-
#
|
54
|
-
# args
|
55
|
-
# file ... File object
|
56
|
-
# pos ... starting point for file#pos
|
57
|
-
# require to be within contiguous range
|
58
|
-
# needle ... RegExp object for contiguous check
|
59
|
-
# step ... buffer size for check processing
|
60
|
-
#
|
61
|
-
# returner
|
62
|
-
# Integer object for file#pos
|
63
|
-
# EOS of matched line
|
64
|
-
def scan_contiguous_min(file, pos, needle, step = 512)
|
65
|
-
|
66
|
-
file.pos = pos
|
67
|
-
min = nil
|
68
|
-
|
69
|
-
loop do
|
70
|
-
|
71
|
-
lines = backward_lines(file, step)
|
72
|
-
lines_pos = lines.index(needle)
|
73
|
-
file_pos = file.pos
|
74
|
-
|
75
|
-
# for debug
|
76
|
-
# p [
|
77
|
-
# lines: lines,
|
78
|
-
# lines_pos: lines_pos,
|
79
|
-
# file_pos: file_pos
|
80
|
-
# ].to_s
|
81
|
-
# sleep 0.05
|
82
|
-
|
83
|
-
if lines_pos.nil?
|
84
|
-
break
|
85
|
-
else
|
86
|
-
min = file_pos + lines_pos
|
87
|
-
break if lines_pos > 0 || file_pos < 1
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
min
|
92
|
-
end
|
93
|
-
|
94
|
-
# scan max file#pos of contiguous range.
|
95
|
-
#
|
96
|
-
# args
|
97
|
-
# file ... File object
|
98
|
-
# pos ... starting point for file#pos
|
99
|
-
# require to be within contiguous range
|
100
|
-
# needle ... RegExp object for contiguous check
|
101
|
-
# step ... buffer size for check processing
|
102
|
-
#
|
103
|
-
# returner
|
104
|
-
# Integer object for file#pos
|
105
|
-
# EOL of matched line
|
106
|
-
def scan_contiguous_max(file, pos, needle, step = 512)
|
107
|
-
|
108
|
-
file.pos = pos
|
109
|
-
max = nil
|
110
|
-
|
111
|
-
loop do
|
112
|
-
|
113
|
-
# file#pos before #forward_lines
|
114
|
-
pos_old = file.pos
|
115
|
-
|
116
|
-
lines = forward_lines(file, step)
|
117
|
-
lines_pos = lines.rindex(needle)
|
118
|
-
|
119
|
-
# for debug
|
120
|
-
# p [
|
121
|
-
# lines: lines,
|
122
|
-
# lines_pos: lines_pos,
|
123
|
-
# file_pos: file.pos
|
124
|
-
# ].to_s
|
125
|
-
# sleep 0.05
|
126
|
-
|
127
|
-
# if did not match needle
|
128
|
-
# - returner is last set value to 'max'
|
129
|
-
break if lines_pos.nil?
|
130
|
-
|
131
|
-
lines_end_pos = lines.index(/([\r\n]+?)/, lines_pos)
|
132
|
-
|
133
|
-
if file.eof?
|
134
|
-
max = (lines_end_pos.nil?) ? file.size : pos_old + lines_end_pos
|
135
|
-
break
|
136
|
-
else
|
137
|
-
max = pos_old + lines_end_pos
|
138
|
-
|
139
|
-
break if lines_end_pos < lines.size - 1
|
140
|
-
end
|
141
|
-
|
28
|
+
def scan_around_lines_seed(file, prefix, *args)
|
29
|
+
scan(file, nil, *args) do |line|
|
30
|
+
line[0, prefix.size] <=> prefix
|
142
31
|
end
|
143
|
-
|
144
|
-
max
|
145
32
|
end
|
146
33
|
end
|
147
34
|
end
|
data/lib/file_bsearch/main.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
1
3
|
module FileBsearch
|
2
4
|
class << self
|
3
5
|
|
@@ -14,16 +16,16 @@ module FileBsearch
|
|
14
16
|
end
|
15
17
|
|
16
18
|
# args
|
17
|
-
#
|
19
|
+
# file ... File object || path for String object
|
18
20
|
#
|
19
21
|
# returner
|
20
22
|
# position in target file
|
21
|
-
def index(
|
23
|
+
def index(file, needle, encoding = nil, *args, &block)
|
22
24
|
|
23
|
-
file =
|
25
|
+
file = init_file(file, encoding)
|
24
26
|
|
25
27
|
# pass to scan method
|
26
|
-
scan(file, *args, &block)
|
28
|
+
scan(file, needle, *args, &block)
|
27
29
|
end
|
28
30
|
end
|
29
31
|
end
|
data/lib/file_bsearch/version.rb
CHANGED
data/spec/file_bsearch_spec.rb
CHANGED
@@ -1,16 +1,19 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
1
3
|
require 'spec_helper'
|
2
4
|
|
3
5
|
describe FileBsearch do
|
4
6
|
|
5
|
-
let(:path) { '/tmp/file_bsearch_sample.csv' }
|
6
|
-
let(:correct) { '9' }
|
7
|
-
let(:incorrect) { '!!!!!' }
|
8
|
-
|
9
|
-
let!(:helper) { MySpecHelper.new(path) }
|
10
|
-
|
11
7
|
it 'has a version number' do
|
12
8
|
expect(FileBsearch::VERSION).not_to be nil
|
13
9
|
end
|
10
|
+
end
|
11
|
+
|
12
|
+
describe '<ASCII FILE>' do
|
13
|
+
|
14
|
+
let!(:helper) { MySpecHelper.new }
|
15
|
+
let(:path) { MySpecHelper::ASCII_SAMPLE_PATH }
|
16
|
+
let(:incorrect) { MySpecHelper::INCORRECT }
|
14
17
|
|
15
18
|
describe 'module' do
|
16
19
|
|
@@ -18,7 +21,8 @@ describe FileBsearch do
|
|
18
21
|
|
19
22
|
it 'when a record is exists' do
|
20
23
|
|
21
|
-
|
24
|
+
arg = MySpecHelper::ASCII_ARG
|
25
|
+
result = FileBsearch.exist?(path, arg)
|
22
26
|
|
23
27
|
expect(result).to eq(true)
|
24
28
|
end
|
@@ -26,7 +30,6 @@ describe FileBsearch do
|
|
26
30
|
it 'when a record is not exist' do
|
27
31
|
|
28
32
|
result = FileBsearch.exist?(path, incorrect)
|
29
|
-
|
30
33
|
expect(result).to eq(false)
|
31
34
|
end
|
32
35
|
|
@@ -34,8 +37,8 @@ describe FileBsearch do
|
|
34
37
|
|
35
38
|
File.foreach(path) do |line|
|
36
39
|
|
37
|
-
|
38
|
-
result = FileBsearch.exist?(path,
|
40
|
+
arg = line.chomp
|
41
|
+
result = FileBsearch.exist?(path, arg)
|
39
42
|
|
40
43
|
expect(result).to eq(true)
|
41
44
|
end
|
@@ -46,10 +49,11 @@ describe FileBsearch do
|
|
46
49
|
|
47
50
|
it 'when returner is number as position in the file' do
|
48
51
|
|
49
|
-
|
50
|
-
|
52
|
+
arg = MySpecHelper::ASCII_ARG
|
53
|
+
pos = FileBsearch.index(path, arg)
|
51
54
|
|
52
|
-
|
55
|
+
result = helper.ascii_correct?(pos, arg)
|
56
|
+
expect(result).to eq(true)
|
53
57
|
end
|
54
58
|
|
55
59
|
it 'when returner is false' do
|
@@ -62,15 +66,16 @@ describe FileBsearch do
|
|
62
66
|
describe '#get_lines' do
|
63
67
|
it 'when lines is exist that with the prefix' do
|
64
68
|
|
65
|
-
|
69
|
+
arg = MySpecHelper::ASCII_LIST_ARG
|
70
|
+
correct = MySpecHelper::ASCII_LIST_CORRECT
|
66
71
|
|
67
|
-
|
72
|
+
result = FileBsearch.get_lines(path, arg)
|
73
|
+
expect(result).to eq(correct)
|
68
74
|
end
|
69
75
|
|
70
76
|
it 'when lines is not exist that with the prefix' do
|
71
77
|
|
72
|
-
result = FileBsearch.get_lines(path,
|
73
|
-
|
78
|
+
result = FileBsearch.get_lines(path, incorrect)
|
74
79
|
expect(result).to eq([])
|
75
80
|
end
|
76
81
|
end
|
@@ -78,23 +83,25 @@ describe FileBsearch do
|
|
78
83
|
|
79
84
|
describe 'instance method' do
|
80
85
|
|
81
|
-
let(:file)
|
86
|
+
let(:file) { open(path) }
|
82
87
|
|
83
88
|
describe '#bsearch' do
|
84
89
|
|
85
90
|
it 'when returner is number as position in the file' do
|
86
91
|
|
87
|
-
|
88
|
-
|
92
|
+
arg = MySpecHelper::ASCII_LIST_ARG
|
93
|
+
pos = file.bsearch(arg)
|
94
|
+
result = helper.ascii_correct?(pos, arg)
|
89
95
|
|
90
|
-
expect(
|
96
|
+
expect(result).to eq(true)
|
91
97
|
end
|
92
98
|
end
|
93
99
|
|
94
100
|
describe '#bsearch?' do
|
95
101
|
it 'when a record is exists' do
|
96
102
|
|
97
|
-
|
103
|
+
arg = MySpecHelper::ASCII_LIST_ARG
|
104
|
+
result = file.bsearch?(arg)
|
98
105
|
|
99
106
|
expect(result).to eq(true)
|
100
107
|
end
|
@@ -103,9 +110,11 @@ describe FileBsearch do
|
|
103
110
|
describe '#bsearch_lines' do
|
104
111
|
it 'when lines is exist that with the prefix' do
|
105
112
|
|
106
|
-
|
113
|
+
arg = MySpecHelper::ASCII_LIST_ARG
|
114
|
+
correct = MySpecHelper::ASCII_LIST_CORRECT
|
107
115
|
|
108
|
-
|
116
|
+
result = file.bsearch_lines(arg)
|
117
|
+
expect(result).to eq(correct)
|
109
118
|
end
|
110
119
|
end
|
111
120
|
end
|
@@ -116,17 +125,19 @@ describe FileBsearch do
|
|
116
125
|
|
117
126
|
it 'when returner is number as position in the file' do
|
118
127
|
|
119
|
-
|
120
|
-
|
128
|
+
arg = MySpecHelper::ASCII_LIST_ARG
|
129
|
+
pos = File.bsearch(path, arg)
|
130
|
+
result = helper.ascii_correct?(pos, arg)
|
121
131
|
|
122
|
-
expect(
|
132
|
+
expect(result).to eq(true)
|
123
133
|
end
|
124
134
|
end
|
125
135
|
|
126
136
|
describe '#bsearch?' do
|
127
137
|
it 'when a record is exists' do
|
128
138
|
|
129
|
-
|
139
|
+
arg = MySpecHelper::ASCII_LIST_ARG
|
140
|
+
result = File.bsearch?(path, arg)
|
130
141
|
|
131
142
|
expect(result).to eq(true)
|
132
143
|
end
|
@@ -135,9 +146,80 @@ describe FileBsearch do
|
|
135
146
|
describe '#bsearch_lines' do
|
136
147
|
it 'when lines is exist that with the prefix' do
|
137
148
|
|
138
|
-
|
149
|
+
arg = MySpecHelper::ASCII_LIST_ARG
|
150
|
+
correct = MySpecHelper::ASCII_LIST_CORRECT
|
151
|
+
|
152
|
+
result = File.bsearch_lines(path, arg)
|
153
|
+
expect(result).to eq(correct)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
describe '<UTF-8 FILE>' do
|
160
|
+
|
161
|
+
let!(:helper) { MySpecHelper.new }
|
162
|
+
let(:path) { MySpecHelper::MULTIBYTE_SAMPLE_PATH }
|
163
|
+
let(:incorrect) { MySpecHelper::INCORRECT }
|
164
|
+
let(:arg) { MySpecHelper::MULTIBYTE_ARG }
|
165
|
+
let(:list_arg) { MySpecHelper::MULTIBYTE_LIST_ARG }
|
166
|
+
|
167
|
+
describe 'module' do
|
168
|
+
|
169
|
+
describe '#exist?' do
|
170
|
+
|
171
|
+
it 'when a record is exists' do
|
172
|
+
|
173
|
+
result = FileBsearch.exist?(path, arg, 'utf-8')
|
174
|
+
expect(result).to eq(true)
|
175
|
+
end
|
176
|
+
|
177
|
+
it 'when a record is not exist' do
|
178
|
+
|
179
|
+
result = FileBsearch.exist?(path, incorrect, 'utf-8')
|
180
|
+
expect(result).to eq(false)
|
181
|
+
end
|
139
182
|
|
140
|
-
|
183
|
+
it 'when all records are exist' do
|
184
|
+
|
185
|
+
File.foreach(path) do |line|
|
186
|
+
|
187
|
+
arg = line.chomp
|
188
|
+
result = FileBsearch.exist?(path, arg, 'utf-8')
|
189
|
+
|
190
|
+
expect(result).to eq(true)
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
describe '#index' do
|
196
|
+
|
197
|
+
it 'when returner is number as position in the file' do
|
198
|
+
|
199
|
+
pos = FileBsearch.index(path, arg, 'utf-8')
|
200
|
+
result = helper.multibyte_correct?(pos, arg)
|
201
|
+
expect(result).to eq(true)
|
202
|
+
end
|
203
|
+
|
204
|
+
it 'when returner is false' do
|
205
|
+
|
206
|
+
result = FileBsearch.index(path, incorrect, 'utf-8')
|
207
|
+
expect(result).to eq(false)
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
describe '#get_lines' do
|
212
|
+
it 'when lines is exist that with the prefix' do
|
213
|
+
|
214
|
+
correct = MySpecHelper::MULTIBYTE_LIST_CORRECT
|
215
|
+
result = FileBsearch.get_lines(path, list_arg, 'utf-8')
|
216
|
+
expect(result).to eq(correct)
|
217
|
+
end
|
218
|
+
|
219
|
+
it 'when lines is not exist that with the prefix' do
|
220
|
+
|
221
|
+
result = FileBsearch.get_lines(path, incorrect, 'utf-8')
|
222
|
+
expect(result).to eq([])
|
141
223
|
end
|
142
224
|
end
|
143
225
|
end
|
data/spec/lib/my_spec_helper.rb
CHANGED
@@ -1,32 +1,80 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
1
3
|
class MySpecHelper
|
2
4
|
|
3
5
|
attr_reader :path
|
4
6
|
|
5
|
-
|
7
|
+
ASCII_SAMPLE_PATH = '/tmp/file_bsearch_sample_ascii.csv'
|
8
|
+
MULTIBYTE_SAMPLE_PATH = '/tmp/file_bsearch_sample_multibyte.csv'
|
9
|
+
|
10
|
+
ASCII_ARG = '9'
|
11
|
+
ASCII_LIST_ARG = '1000'
|
12
|
+
ASCII_LIST_CORRECT = %w{1000 10000}
|
13
|
+
INCORRECT = '!!!!!'
|
14
|
+
|
15
|
+
MULTIBYTE_ARG = 'ああ'
|
16
|
+
MULTIBYTE_LIST_ARG = 'あ'
|
17
|
+
MULTIBYTE_LIST_CORRECT = ["あぁ", "ああ", "あぃ", "あい", "あぅ", "あう", "あぇ", "あえ", "あぉ", "あお", "あか", "あが", "あき", "あぎ", "あく", "あぐ", "あけ", "あげ", "あこ", "あご", "あさ", "あざ", "あし", "あじ", "あす", "あず", "あせ", "あぜ", "あそ", "あぞ", "あた", "あだ", "あち", "あぢ", "あっ", "あつ", "あづ", "あて", "あで", "あと", "あど", "あな", "あに", "あぬ", "あね", "あの", "あは", "あば", "あぱ", "あひ", "あび", "あぴ", "あふ", "あぶ", "あぷ", "あへ", "あべ", "あぺ", "あほ", "あぼ", "あぽ", "あま", "あみ", "あむ", "あめ", "あも", "あゃ", "あや", "あゅ", "あゆ", "あょ", "あよ", "あら", "あり", "ある", "あれ", "あろ", "あゎ", "あわ", "あゐ", "あゑ", "あを", "あん", "あゔ", "あゕ", "あゖ"]
|
6
18
|
|
7
|
-
|
19
|
+
def initialize
|
8
20
|
|
9
|
-
|
21
|
+
init_ascii_sample
|
22
|
+
init_multibyte_sample
|
10
23
|
end
|
11
24
|
|
12
|
-
def
|
25
|
+
def init_ascii_sample
|
26
|
+
|
27
|
+
path = ASCII_SAMPLE_PATH
|
13
28
|
|
14
29
|
# guard
|
15
30
|
# - if file is already exists, do not
|
16
|
-
return nil if File.exist?(
|
31
|
+
return nil if File.exist?(path)
|
17
32
|
|
18
33
|
# create sorted string
|
19
|
-
str = (0..10_000)
|
34
|
+
str = create_lines(0..10_000)
|
20
35
|
|
21
|
-
IO.write(
|
36
|
+
IO.write(path, str)
|
22
37
|
end
|
23
38
|
|
24
|
-
|
39
|
+
def init_multibyte_sample
|
40
|
+
|
41
|
+
path = MULTIBYTE_SAMPLE_PATH
|
42
|
+
|
43
|
+
# guard
|
44
|
+
# - if file is already exists, do not
|
45
|
+
return nil if File.exist?(path)
|
46
|
+
|
47
|
+
# create sorted string
|
48
|
+
str = create_lines('ぁぁ'..'んん')
|
49
|
+
|
50
|
+
IO.write(path, str)
|
51
|
+
end
|
52
|
+
|
53
|
+
# check correct for ascii file
|
54
|
+
#
|
55
|
+
# args
|
56
|
+
# pos ... Integer object for position in file
|
57
|
+
# (returner of FileBsearch#index)
|
58
|
+
# str ... String object for comparison
|
59
|
+
def ascii_correct?(pos, str)
|
60
|
+
str == IO.read(ASCII_SAMPLE_PATH, str.bytesize, pos)
|
61
|
+
end
|
62
|
+
|
63
|
+
# check correct for ascii file
|
25
64
|
#
|
26
65
|
# args
|
27
|
-
#
|
28
|
-
#
|
29
|
-
|
30
|
-
|
66
|
+
# pos ... Integer object for position in file
|
67
|
+
# (returner of FileBsearch#index)
|
68
|
+
# str ... String object for comparison
|
69
|
+
def multibyte_correct?(pos, str)
|
70
|
+
|
71
|
+
read_str = IO.read(MULTIBYTE_SAMPLE_PATH, str.bytesize, pos)
|
72
|
+
str == read_str.toutf8
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
def create_lines(range)
|
78
|
+
range.to_a.sort { |a, b| a.to_s <=> b.to_s }.join("\n")
|
31
79
|
end
|
32
80
|
end
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: file_bsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- indeep-xyz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10-
|
11
|
+
date: 2014-10-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: file_char_licker
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.5'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0.5'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: bundler
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,8 +80,8 @@ dependencies:
|
|
66
80
|
- - ">="
|
67
81
|
- !ruby/object:Gem::Version
|
68
82
|
version: '0'
|
69
|
-
description:
|
70
|
-
bigger.
|
83
|
+
description: |
|
84
|
+
binary search for sorted text file. it is effective when file size is bigger.
|
71
85
|
email:
|
72
86
|
- indeep.xyz@gmail.com
|
73
87
|
executables: []
|