file_bsearch 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.ja.md +34 -12
- data/README.md +32 -12
- data/file_bsearch.gemspec +5 -1
- data/lib/file_bsearch/common.rb +15 -91
- data/lib/file_bsearch/get_lines.rb +12 -125
- data/lib/file_bsearch/main.rb +6 -4
- data/lib/file_bsearch/version.rb +1 -1
- data/spec/file_bsearch_spec.rb +112 -30
- data/spec/lib/my_spec_helper.rb +60 -12
- metadata +18 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 789ee1ff24c686a8a3a55d6217bd5b20de1dec49
|
4
|
+
data.tar.gz: 003864105b6c297bb57688e64346cc5d8ffc85cb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1a506a023c0eff005ecba65ff699775bf14a35b4474077f1becb9f5d6f8ccf11b755ce553097db3abd48886c848118f10460892b09ba669fe11f8e57549e8bcf
|
7
|
+
data.tar.gz: cad31db3261cc1e89f2846665627a7c0f9efacab7b2a72aed34b66c81ec826876bdeaa6208b33eef1267fd418ad9c8861fe82fa69d41b9881b5c357df3df80ca
|
data/README.ja.md
CHANGED
@@ -13,44 +13,66 @@ gem install file_bsearch
|
|
13
13
|
|
14
14
|
## Usage
|
15
15
|
|
16
|
+
### 引数
|
17
|
+
|
18
|
+
Usage のドキュメント内で使用する共通の変数について、以下に解説を記します。
|
19
|
+
|
20
|
+
#### encoding
|
21
|
+
|
22
|
+
文字コードの指定。
|
23
|
+
|
24
|
+
utf8, eucjp, jis, sjis か、それぞれの接頭辞を渡してください。省略するとアスキー文字で検索をおこないます。
|
25
|
+
|
26
|
+
#### path
|
27
|
+
|
28
|
+
ソート済みのテキストファイルのパス。
|
29
|
+
|
30
|
+
#### string
|
31
|
+
|
32
|
+
検索に用いる文字列。
|
33
|
+
|
34
|
+
#### prefix
|
35
|
+
|
36
|
+
検索に用いる、接頭辞を表す文字列。
|
37
|
+
|
16
38
|
### 指定文字列の行が、ファイル内に存在するか
|
17
39
|
|
18
40
|
```ruby
|
19
|
-
FileBsearch.exist?(path, string)
|
41
|
+
FileBsearch.exist?(path, string, encoding)
|
20
42
|
|
21
|
-
File.bsearch?(path, string)
|
43
|
+
File.bsearch?(path, string, encoding)
|
22
44
|
|
23
45
|
file = open(path)
|
24
|
-
file.bsearch?(string)
|
46
|
+
file.bsearch?(string, encoding)
|
25
47
|
```
|
26
48
|
|
27
|
-
存在する場合は true 、存在しない場合は false
|
49
|
+
存在する場合は true 、存在しない場合は false を返します。
|
28
50
|
|
29
51
|
### 指定文字列の行が、ファイル内のどの位置にあるか
|
30
52
|
|
31
53
|
```ruby
|
32
|
-
FileBsearch.index(path, string)
|
54
|
+
FileBsearch.index(path, string, encoding)
|
33
55
|
|
34
|
-
File.bsearch(path, string)
|
56
|
+
File.bsearch(path, string, encoding)
|
35
57
|
|
36
58
|
file = open(path)
|
37
|
-
file.bsearch(string)
|
59
|
+
file.bsearch(string, encoding)
|
38
60
|
```
|
39
61
|
|
40
|
-
|
62
|
+
存在する場合はその行の先頭のファイルポインタ位置の数値、存在しない場合は false を返します。
|
41
63
|
|
42
64
|
### 指定文字列から始まる行の取得
|
43
65
|
|
44
66
|
```ruby
|
45
|
-
FileBsearch.get_lines(path, prefix)
|
67
|
+
FileBsearch.get_lines(path, prefix, encoding)
|
46
68
|
|
47
|
-
File.bsearch_lines(path, prefix)
|
69
|
+
File.bsearch_lines(path, prefix, encoding)
|
48
70
|
|
49
71
|
file = open(path)
|
50
|
-
file.bsearch_lines(prefix)
|
72
|
+
file.bsearch_lines(prefix, encoding)
|
51
73
|
```
|
52
74
|
|
53
|
-
|
75
|
+
存在する場合はそれらの行の文字列を含んだ配列、存在しない場合は空の配列を返します。
|
54
76
|
|
55
77
|
## Contributing
|
56
78
|
|
data/README.md
CHANGED
@@ -13,15 +13,35 @@ gem install file_bsearch
|
|
13
13
|
|
14
14
|
## Usage
|
15
15
|
|
16
|
+
### common arguments
|
17
|
+
|
18
|
+
common arguments in usage document. mean of each arguments are following.
|
19
|
+
|
20
|
+
#### encoding
|
21
|
+
|
22
|
+
for character encodings. you can pass String object that is 'utf8', 'eucjp', 'jis', 'sjis' or each prefix. if you pass nil or does not pass, search for ascii character.
|
23
|
+
|
24
|
+
#### path
|
25
|
+
|
26
|
+
path for sorted text file.
|
27
|
+
|
28
|
+
#### string
|
29
|
+
|
30
|
+
string for search.
|
31
|
+
|
32
|
+
#### prefix
|
33
|
+
|
34
|
+
string for prefix search.
|
35
|
+
|
16
36
|
### check to exist line that identical with passed string
|
17
37
|
|
18
38
|
```ruby
|
19
|
-
FileBsearch.exist?(path, string)
|
39
|
+
FileBsearch.exist?(path, string, encoding)
|
20
40
|
|
21
|
-
File.bsearch?(path, string)
|
41
|
+
File.bsearch?(path, string, encoding)
|
22
42
|
|
23
43
|
file = open(path)
|
24
|
-
file.bsearch?(string)
|
44
|
+
file.bsearch?(string, encoding)
|
25
45
|
```
|
26
46
|
|
27
47
|
if exists, return true.
|
@@ -30,29 +50,29 @@ if not exists, return false.
|
|
30
50
|
### search position in file for the line that is identical with passed string
|
31
51
|
|
32
52
|
```ruby
|
33
|
-
FileBsearch.index(path, string)
|
53
|
+
FileBsearch.index(path, string, encoding)
|
34
54
|
|
35
|
-
File.bsearch(path, string)
|
55
|
+
File.bsearch(path, string, encoding)
|
36
56
|
|
37
57
|
file = open(path)
|
38
|
-
file.bsearch(string)
|
58
|
+
file.bsearch(string, encoding)
|
39
59
|
```
|
40
60
|
|
41
|
-
if exists, return Integer object that
|
61
|
+
if exists, return Integer object that has the position of file pointer for head of matched line.
|
42
62
|
if not exists, return false.
|
43
63
|
|
44
64
|
### get lines that matched string with passed prefix
|
45
65
|
|
46
66
|
```ruby
|
47
|
-
FileBsearch.get_lines(path, prefix)
|
67
|
+
FileBsearch.get_lines(path, prefix, encoding)
|
48
68
|
|
49
|
-
File.bsearch_lines(path, prefix)
|
69
|
+
File.bsearch_lines(path, prefix, encoding)
|
50
70
|
|
51
71
|
file = open(path)
|
52
|
-
file.bsearch_lines(prefix)
|
72
|
+
file.bsearch_lines(prefix, encoding)
|
53
73
|
```
|
54
74
|
|
55
|
-
if exists, return Array object that
|
75
|
+
if exists, return Array object that has the matched lines.
|
56
76
|
if not exists, return empty Array object.
|
57
77
|
|
58
78
|
## Contributing
|
@@ -65,4 +85,4 @@ if not exists, return empty Array object.
|
|
65
85
|
|
66
86
|
## Author
|
67
87
|
|
68
|
-
[indeep-xyz](http://indeep.xyz/)
|
88
|
+
[indeep-xyz](http://indeep.xyz/) (Japanese language)
|
data/file_bsearch.gemspec
CHANGED
@@ -9,7 +9,9 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.authors = ["indeep-xyz"]
|
10
10
|
spec.email = ["indeep.xyz@gmail.com"]
|
11
11
|
spec.summary = %q{binary search for sorted text file.}
|
12
|
-
spec.description =
|
12
|
+
spec.description = <<EOS
|
13
|
+
binary search for sorted text file. it is effective when file size is bigger.
|
14
|
+
EOS
|
13
15
|
spec.homepage = "https://github.com/indeep-xyz/ruby-file-bsearch/"
|
14
16
|
spec.license = "MIT"
|
15
17
|
|
@@ -18,6 +20,8 @@ Gem::Specification.new do |spec|
|
|
18
20
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
21
|
spec.require_paths = ["lib"]
|
20
22
|
|
23
|
+
spec.add_runtime_dependency "file_char_licker", "~> 0.5"
|
24
|
+
|
21
25
|
spec.add_development_dependency "bundler", "~> 1.7"
|
22
26
|
spec.add_development_dependency "rake", "~> 10.0"
|
23
27
|
spec.add_development_dependency "rspec"
|
data/lib/file_bsearch/common.rb
CHANGED
@@ -1,87 +1,9 @@
|
|
1
|
-
|
2
|
-
class << self
|
3
|
-
|
4
|
-
private
|
5
|
-
|
6
|
-
# get backward lines
|
7
|
-
#
|
8
|
-
# args
|
9
|
-
# file ... File object
|
10
|
-
# #pos value should be at SOL (Start Of Line)
|
11
|
-
# size ... indication of reading size
|
12
|
-
#
|
13
|
-
# returner
|
14
|
-
# String object as lines
|
15
|
-
def backward_lines(file, size = 512)
|
16
|
-
|
17
|
-
result = ""
|
18
|
-
|
19
|
-
while file.pos > 0
|
20
|
-
|
21
|
-
file.seek(-1, IO::SEEK_CUR)
|
22
|
-
char = file.getc
|
23
|
-
|
24
|
-
if char.match(/[\r\n]/) && result.size > size
|
25
|
-
break
|
26
|
-
else
|
27
|
-
result.insert(0, char)
|
28
|
-
file.seek(-1, IO::SEEK_CUR)
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
result
|
33
|
-
end
|
34
|
-
|
35
|
-
# get a line string at current position
|
36
|
-
#
|
37
|
-
# args
|
38
|
-
# file ... File object
|
39
|
-
def current_line(file)
|
40
|
-
|
41
|
-
if file.pos > 0
|
42
|
-
|
43
|
-
# move pointer to before character
|
44
|
-
file.seek(-1, IO::SEEK_CUR)
|
1
|
+
# coding: utf-8
|
45
2
|
|
46
|
-
|
47
|
-
# - move pointer until reach to EOL of before line.
|
48
|
-
until file.getc.match(/[\n\r]/)
|
3
|
+
require "file_char_licker"
|
49
4
|
|
50
|
-
|
51
|
-
|
52
|
-
file.seek(-2, IO::SEEK_CUR)
|
53
|
-
else
|
54
|
-
|
55
|
-
# if EOS, break
|
56
|
-
file.rewind
|
57
|
-
break
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
file.gets
|
63
|
-
end
|
64
|
-
|
65
|
-
# get forward lines
|
66
|
-
#
|
67
|
-
# args
|
68
|
-
# file ... File object
|
69
|
-
# #pos value should be at SOL (Start Of Line)
|
70
|
-
# size ... indication of reading size
|
71
|
-
#
|
72
|
-
# returner
|
73
|
-
# String object as lines
|
74
|
-
def forward_lines(file, size = 512)
|
75
|
-
|
76
|
-
result = ""
|
77
|
-
|
78
|
-
while result.size < size && !file.eof?
|
79
|
-
|
80
|
-
result += file.gets
|
81
|
-
end
|
82
|
-
|
83
|
-
result
|
84
|
-
end
|
5
|
+
module FileBsearch
|
6
|
+
class << self
|
85
7
|
|
86
8
|
# args
|
87
9
|
# file ... File object
|
@@ -107,7 +29,7 @@ module FileBsearch
|
|
107
29
|
# - if EOF, not found
|
108
30
|
# - if pointer did not moved, not found
|
109
31
|
while max > file.pos \
|
110
|
-
&& (line = current_line
|
32
|
+
&& (line = file.current_line) \
|
111
33
|
&& file.pos != old_pos
|
112
34
|
|
113
35
|
old_pos = file.pos
|
@@ -115,8 +37,8 @@ module FileBsearch
|
|
115
37
|
|
116
38
|
# for debug
|
117
39
|
# p "-- #{needle}, " + {
|
118
|
-
# code: code,
|
119
40
|
# text: line.chomp,
|
41
|
+
# code: code,
|
120
42
|
# min: min,
|
121
43
|
# max: max,
|
122
44
|
# pos: file.pos
|
@@ -128,8 +50,8 @@ module FileBsearch
|
|
128
50
|
# - if match, return
|
129
51
|
case code
|
130
52
|
when -1 then min = file.pos - 1
|
131
|
-
when 1 then max = file.pos - line.
|
132
|
-
else return file.pos - line.
|
53
|
+
when 1 then max = file.pos - line.bytesize
|
54
|
+
else return file.pos - line.bytesize
|
133
55
|
end
|
134
56
|
|
135
57
|
# move to mid point
|
@@ -140,16 +62,18 @@ module FileBsearch
|
|
140
62
|
end
|
141
63
|
|
142
64
|
# args
|
143
|
-
#
|
65
|
+
# file ... File object || path for String object
|
144
66
|
#
|
145
67
|
# returner
|
146
68
|
# File object
|
147
|
-
def
|
69
|
+
def init_file(file, encoding)
|
148
70
|
|
149
|
-
# check the
|
71
|
+
# check the file argument
|
150
72
|
# - if not File object, open it as String for file path
|
151
|
-
|
152
|
-
|
73
|
+
file = File.open(file.to_s) unless file.is_a?(File)
|
74
|
+
FileCharLicker.attach(file, encoding)
|
75
|
+
|
76
|
+
file
|
153
77
|
end
|
154
78
|
end
|
155
79
|
end
|
@@ -1,22 +1,23 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
1
3
|
module FileBsearch
|
2
4
|
class << self
|
3
5
|
|
4
6
|
# returner
|
5
7
|
# Array object
|
6
|
-
def get_lines(
|
8
|
+
def get_lines(file, prefix, encoding = nil, *args)
|
7
9
|
|
8
|
-
|
9
|
-
|
10
|
+
file = init_file(file, encoding)
|
11
|
+
prefix = prefix.to_s
|
10
12
|
|
11
13
|
# pass to scan method
|
12
|
-
pos =
|
13
|
-
line[0, prefix.size] <=> prefix
|
14
|
-
end
|
14
|
+
pos = scan_around_lines_seed(file, prefix, *args)
|
15
15
|
|
16
16
|
if pos.is_a?(Integer)
|
17
17
|
|
18
|
-
|
19
|
-
|
18
|
+
file.seek(pos)
|
19
|
+
lines = file.around_lines(/^#{Regexp.escape(prefix)}/)
|
20
|
+
result = lines.chomp.split(/[\r\n]+/) if lines.bytesize > 0
|
20
21
|
end
|
21
22
|
|
22
23
|
result || []
|
@@ -24,124 +25,10 @@ module FileBsearch
|
|
24
25
|
|
25
26
|
private
|
26
27
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
# file ... File object
|
31
|
-
# pos ... starting point for file#pos
|
32
|
-
# require to be within contiguous range
|
33
|
-
# needle ... RegExp object for contiguous check
|
34
|
-
#
|
35
|
-
# returner
|
36
|
-
# Array object
|
37
|
-
def around_lines(file, pos, needle)
|
38
|
-
|
39
|
-
# scan
|
40
|
-
min = scan_contiguous_min(file, pos, needle) || pos
|
41
|
-
max = scan_contiguous_max(file, pos, needle) || pos
|
42
|
-
|
43
|
-
# read
|
44
|
-
file.seek(min)
|
45
|
-
lines = file.read(max - min).chomp
|
46
|
-
|
47
|
-
# return
|
48
|
-
return lines.split(/[\r\n]+/) if lines.size > 0
|
49
|
-
[]
|
50
|
-
end
|
51
|
-
|
52
|
-
# scan min file#pos of contiguous range.
|
53
|
-
#
|
54
|
-
# args
|
55
|
-
# file ... File object
|
56
|
-
# pos ... starting point for file#pos
|
57
|
-
# require to be within contiguous range
|
58
|
-
# needle ... RegExp object for contiguous check
|
59
|
-
# step ... buffer size for check processing
|
60
|
-
#
|
61
|
-
# returner
|
62
|
-
# Integer object for file#pos
|
63
|
-
# EOS of matched line
|
64
|
-
def scan_contiguous_min(file, pos, needle, step = 512)
|
65
|
-
|
66
|
-
file.pos = pos
|
67
|
-
min = nil
|
68
|
-
|
69
|
-
loop do
|
70
|
-
|
71
|
-
lines = backward_lines(file, step)
|
72
|
-
lines_pos = lines.index(needle)
|
73
|
-
file_pos = file.pos
|
74
|
-
|
75
|
-
# for debug
|
76
|
-
# p [
|
77
|
-
# lines: lines,
|
78
|
-
# lines_pos: lines_pos,
|
79
|
-
# file_pos: file_pos
|
80
|
-
# ].to_s
|
81
|
-
# sleep 0.05
|
82
|
-
|
83
|
-
if lines_pos.nil?
|
84
|
-
break
|
85
|
-
else
|
86
|
-
min = file_pos + lines_pos
|
87
|
-
break if lines_pos > 0 || file_pos < 1
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
min
|
92
|
-
end
|
93
|
-
|
94
|
-
# scan max file#pos of contiguous range.
|
95
|
-
#
|
96
|
-
# args
|
97
|
-
# file ... File object
|
98
|
-
# pos ... starting point for file#pos
|
99
|
-
# require to be within contiguous range
|
100
|
-
# needle ... RegExp object for contiguous check
|
101
|
-
# step ... buffer size for check processing
|
102
|
-
#
|
103
|
-
# returner
|
104
|
-
# Integer object for file#pos
|
105
|
-
# EOL of matched line
|
106
|
-
def scan_contiguous_max(file, pos, needle, step = 512)
|
107
|
-
|
108
|
-
file.pos = pos
|
109
|
-
max = nil
|
110
|
-
|
111
|
-
loop do
|
112
|
-
|
113
|
-
# file#pos before #forward_lines
|
114
|
-
pos_old = file.pos
|
115
|
-
|
116
|
-
lines = forward_lines(file, step)
|
117
|
-
lines_pos = lines.rindex(needle)
|
118
|
-
|
119
|
-
# for debug
|
120
|
-
# p [
|
121
|
-
# lines: lines,
|
122
|
-
# lines_pos: lines_pos,
|
123
|
-
# file_pos: file.pos
|
124
|
-
# ].to_s
|
125
|
-
# sleep 0.05
|
126
|
-
|
127
|
-
# if did not match needle
|
128
|
-
# - returner is last set value to 'max'
|
129
|
-
break if lines_pos.nil?
|
130
|
-
|
131
|
-
lines_end_pos = lines.index(/([\r\n]+?)/, lines_pos)
|
132
|
-
|
133
|
-
if file.eof?
|
134
|
-
max = (lines_end_pos.nil?) ? file.size : pos_old + lines_end_pos
|
135
|
-
break
|
136
|
-
else
|
137
|
-
max = pos_old + lines_end_pos
|
138
|
-
|
139
|
-
break if lines_end_pos < lines.size - 1
|
140
|
-
end
|
141
|
-
|
28
|
+
def scan_around_lines_seed(file, prefix, *args)
|
29
|
+
scan(file, nil, *args) do |line|
|
30
|
+
line[0, prefix.size] <=> prefix
|
142
31
|
end
|
143
|
-
|
144
|
-
max
|
145
32
|
end
|
146
33
|
end
|
147
34
|
end
|
data/lib/file_bsearch/main.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
1
3
|
module FileBsearch
|
2
4
|
class << self
|
3
5
|
|
@@ -14,16 +16,16 @@ module FileBsearch
|
|
14
16
|
end
|
15
17
|
|
16
18
|
# args
|
17
|
-
#
|
19
|
+
# file ... File object || path for String object
|
18
20
|
#
|
19
21
|
# returner
|
20
22
|
# position in target file
|
21
|
-
def index(
|
23
|
+
def index(file, needle, encoding = nil, *args, &block)
|
22
24
|
|
23
|
-
file =
|
25
|
+
file = init_file(file, encoding)
|
24
26
|
|
25
27
|
# pass to scan method
|
26
|
-
scan(file, *args, &block)
|
28
|
+
scan(file, needle, *args, &block)
|
27
29
|
end
|
28
30
|
end
|
29
31
|
end
|
data/lib/file_bsearch/version.rb
CHANGED
data/spec/file_bsearch_spec.rb
CHANGED
@@ -1,16 +1,19 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
1
3
|
require 'spec_helper'
|
2
4
|
|
3
5
|
describe FileBsearch do
|
4
6
|
|
5
|
-
let(:path) { '/tmp/file_bsearch_sample.csv' }
|
6
|
-
let(:correct) { '9' }
|
7
|
-
let(:incorrect) { '!!!!!' }
|
8
|
-
|
9
|
-
let!(:helper) { MySpecHelper.new(path) }
|
10
|
-
|
11
7
|
it 'has a version number' do
|
12
8
|
expect(FileBsearch::VERSION).not_to be nil
|
13
9
|
end
|
10
|
+
end
|
11
|
+
|
12
|
+
describe '<ASCII FILE>' do
|
13
|
+
|
14
|
+
let!(:helper) { MySpecHelper.new }
|
15
|
+
let(:path) { MySpecHelper::ASCII_SAMPLE_PATH }
|
16
|
+
let(:incorrect) { MySpecHelper::INCORRECT }
|
14
17
|
|
15
18
|
describe 'module' do
|
16
19
|
|
@@ -18,7 +21,8 @@ describe FileBsearch do
|
|
18
21
|
|
19
22
|
it 'when a record is exists' do
|
20
23
|
|
21
|
-
|
24
|
+
arg = MySpecHelper::ASCII_ARG
|
25
|
+
result = FileBsearch.exist?(path, arg)
|
22
26
|
|
23
27
|
expect(result).to eq(true)
|
24
28
|
end
|
@@ -26,7 +30,6 @@ describe FileBsearch do
|
|
26
30
|
it 'when a record is not exist' do
|
27
31
|
|
28
32
|
result = FileBsearch.exist?(path, incorrect)
|
29
|
-
|
30
33
|
expect(result).to eq(false)
|
31
34
|
end
|
32
35
|
|
@@ -34,8 +37,8 @@ describe FileBsearch do
|
|
34
37
|
|
35
38
|
File.foreach(path) do |line|
|
36
39
|
|
37
|
-
|
38
|
-
result = FileBsearch.exist?(path,
|
40
|
+
arg = line.chomp
|
41
|
+
result = FileBsearch.exist?(path, arg)
|
39
42
|
|
40
43
|
expect(result).to eq(true)
|
41
44
|
end
|
@@ -46,10 +49,11 @@ describe FileBsearch do
|
|
46
49
|
|
47
50
|
it 'when returner is number as position in the file' do
|
48
51
|
|
49
|
-
|
50
|
-
|
52
|
+
arg = MySpecHelper::ASCII_ARG
|
53
|
+
pos = FileBsearch.index(path, arg)
|
51
54
|
|
52
|
-
|
55
|
+
result = helper.ascii_correct?(pos, arg)
|
56
|
+
expect(result).to eq(true)
|
53
57
|
end
|
54
58
|
|
55
59
|
it 'when returner is false' do
|
@@ -62,15 +66,16 @@ describe FileBsearch do
|
|
62
66
|
describe '#get_lines' do
|
63
67
|
it 'when lines is exist that with the prefix' do
|
64
68
|
|
65
|
-
|
69
|
+
arg = MySpecHelper::ASCII_LIST_ARG
|
70
|
+
correct = MySpecHelper::ASCII_LIST_CORRECT
|
66
71
|
|
67
|
-
|
72
|
+
result = FileBsearch.get_lines(path, arg)
|
73
|
+
expect(result).to eq(correct)
|
68
74
|
end
|
69
75
|
|
70
76
|
it 'when lines is not exist that with the prefix' do
|
71
77
|
|
72
|
-
result = FileBsearch.get_lines(path,
|
73
|
-
|
78
|
+
result = FileBsearch.get_lines(path, incorrect)
|
74
79
|
expect(result).to eq([])
|
75
80
|
end
|
76
81
|
end
|
@@ -78,23 +83,25 @@ describe FileBsearch do
|
|
78
83
|
|
79
84
|
describe 'instance method' do
|
80
85
|
|
81
|
-
let(:file)
|
86
|
+
let(:file) { open(path) }
|
82
87
|
|
83
88
|
describe '#bsearch' do
|
84
89
|
|
85
90
|
it 'when returner is number as position in the file' do
|
86
91
|
|
87
|
-
|
88
|
-
|
92
|
+
arg = MySpecHelper::ASCII_LIST_ARG
|
93
|
+
pos = file.bsearch(arg)
|
94
|
+
result = helper.ascii_correct?(pos, arg)
|
89
95
|
|
90
|
-
expect(
|
96
|
+
expect(result).to eq(true)
|
91
97
|
end
|
92
98
|
end
|
93
99
|
|
94
100
|
describe '#bsearch?' do
|
95
101
|
it 'when a record is exists' do
|
96
102
|
|
97
|
-
|
103
|
+
arg = MySpecHelper::ASCII_LIST_ARG
|
104
|
+
result = file.bsearch?(arg)
|
98
105
|
|
99
106
|
expect(result).to eq(true)
|
100
107
|
end
|
@@ -103,9 +110,11 @@ describe FileBsearch do
|
|
103
110
|
describe '#bsearch_lines' do
|
104
111
|
it 'when lines is exist that with the prefix' do
|
105
112
|
|
106
|
-
|
113
|
+
arg = MySpecHelper::ASCII_LIST_ARG
|
114
|
+
correct = MySpecHelper::ASCII_LIST_CORRECT
|
107
115
|
|
108
|
-
|
116
|
+
result = file.bsearch_lines(arg)
|
117
|
+
expect(result).to eq(correct)
|
109
118
|
end
|
110
119
|
end
|
111
120
|
end
|
@@ -116,17 +125,19 @@ describe FileBsearch do
|
|
116
125
|
|
117
126
|
it 'when returner is number as position in the file' do
|
118
127
|
|
119
|
-
|
120
|
-
|
128
|
+
arg = MySpecHelper::ASCII_LIST_ARG
|
129
|
+
pos = File.bsearch(path, arg)
|
130
|
+
result = helper.ascii_correct?(pos, arg)
|
121
131
|
|
122
|
-
expect(
|
132
|
+
expect(result).to eq(true)
|
123
133
|
end
|
124
134
|
end
|
125
135
|
|
126
136
|
describe '#bsearch?' do
|
127
137
|
it 'when a record is exists' do
|
128
138
|
|
129
|
-
|
139
|
+
arg = MySpecHelper::ASCII_LIST_ARG
|
140
|
+
result = File.bsearch?(path, arg)
|
130
141
|
|
131
142
|
expect(result).to eq(true)
|
132
143
|
end
|
@@ -135,9 +146,80 @@ describe FileBsearch do
|
|
135
146
|
describe '#bsearch_lines' do
|
136
147
|
it 'when lines is exist that with the prefix' do
|
137
148
|
|
138
|
-
|
149
|
+
arg = MySpecHelper::ASCII_LIST_ARG
|
150
|
+
correct = MySpecHelper::ASCII_LIST_CORRECT
|
151
|
+
|
152
|
+
result = File.bsearch_lines(path, arg)
|
153
|
+
expect(result).to eq(correct)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
describe '<UTF-8 FILE>' do
|
160
|
+
|
161
|
+
let!(:helper) { MySpecHelper.new }
|
162
|
+
let(:path) { MySpecHelper::MULTIBYTE_SAMPLE_PATH }
|
163
|
+
let(:incorrect) { MySpecHelper::INCORRECT }
|
164
|
+
let(:arg) { MySpecHelper::MULTIBYTE_ARG }
|
165
|
+
let(:list_arg) { MySpecHelper::MULTIBYTE_LIST_ARG }
|
166
|
+
|
167
|
+
describe 'module' do
|
168
|
+
|
169
|
+
describe '#exist?' do
|
170
|
+
|
171
|
+
it 'when a record is exists' do
|
172
|
+
|
173
|
+
result = FileBsearch.exist?(path, arg, 'utf-8')
|
174
|
+
expect(result).to eq(true)
|
175
|
+
end
|
176
|
+
|
177
|
+
it 'when a record is not exist' do
|
178
|
+
|
179
|
+
result = FileBsearch.exist?(path, incorrect, 'utf-8')
|
180
|
+
expect(result).to eq(false)
|
181
|
+
end
|
139
182
|
|
140
|
-
|
183
|
+
it 'when all records are exist' do
|
184
|
+
|
185
|
+
File.foreach(path) do |line|
|
186
|
+
|
187
|
+
arg = line.chomp
|
188
|
+
result = FileBsearch.exist?(path, arg, 'utf-8')
|
189
|
+
|
190
|
+
expect(result).to eq(true)
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
describe '#index' do
|
196
|
+
|
197
|
+
it 'when returner is number as position in the file' do
|
198
|
+
|
199
|
+
pos = FileBsearch.index(path, arg, 'utf-8')
|
200
|
+
result = helper.multibyte_correct?(pos, arg)
|
201
|
+
expect(result).to eq(true)
|
202
|
+
end
|
203
|
+
|
204
|
+
it 'when returner is false' do
|
205
|
+
|
206
|
+
result = FileBsearch.index(path, incorrect, 'utf-8')
|
207
|
+
expect(result).to eq(false)
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
211
|
+
describe '#get_lines' do
|
212
|
+
it 'when lines is exist that with the prefix' do
|
213
|
+
|
214
|
+
correct = MySpecHelper::MULTIBYTE_LIST_CORRECT
|
215
|
+
result = FileBsearch.get_lines(path, list_arg, 'utf-8')
|
216
|
+
expect(result).to eq(correct)
|
217
|
+
end
|
218
|
+
|
219
|
+
it 'when lines is not exist that with the prefix' do
|
220
|
+
|
221
|
+
result = FileBsearch.get_lines(path, incorrect, 'utf-8')
|
222
|
+
expect(result).to eq([])
|
141
223
|
end
|
142
224
|
end
|
143
225
|
end
|
data/spec/lib/my_spec_helper.rb
CHANGED
@@ -1,32 +1,80 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
1
3
|
class MySpecHelper
|
2
4
|
|
3
5
|
attr_reader :path
|
4
6
|
|
5
|
-
|
7
|
+
ASCII_SAMPLE_PATH = '/tmp/file_bsearch_sample_ascii.csv'
|
8
|
+
MULTIBYTE_SAMPLE_PATH = '/tmp/file_bsearch_sample_multibyte.csv'
|
9
|
+
|
10
|
+
ASCII_ARG = '9'
|
11
|
+
ASCII_LIST_ARG = '1000'
|
12
|
+
ASCII_LIST_CORRECT = %w{1000 10000}
|
13
|
+
INCORRECT = '!!!!!'
|
14
|
+
|
15
|
+
MULTIBYTE_ARG = 'ああ'
|
16
|
+
MULTIBYTE_LIST_ARG = 'あ'
|
17
|
+
MULTIBYTE_LIST_CORRECT = ["あぁ", "ああ", "あぃ", "あい", "あぅ", "あう", "あぇ", "あえ", "あぉ", "あお", "あか", "あが", "あき", "あぎ", "あく", "あぐ", "あけ", "あげ", "あこ", "あご", "あさ", "あざ", "あし", "あじ", "あす", "あず", "あせ", "あぜ", "あそ", "あぞ", "あた", "あだ", "あち", "あぢ", "あっ", "あつ", "あづ", "あて", "あで", "あと", "あど", "あな", "あに", "あぬ", "あね", "あの", "あは", "あば", "あぱ", "あひ", "あび", "あぴ", "あふ", "あぶ", "あぷ", "あへ", "あべ", "あぺ", "あほ", "あぼ", "あぽ", "あま", "あみ", "あむ", "あめ", "あも", "あゃ", "あや", "あゅ", "あゆ", "あょ", "あよ", "あら", "あり", "ある", "あれ", "あろ", "あゎ", "あわ", "あゐ", "あゑ", "あを", "あん", "あゔ", "あゕ", "あゖ"]
|
6
18
|
|
7
|
-
|
19
|
+
def initialize
|
8
20
|
|
9
|
-
|
21
|
+
init_ascii_sample
|
22
|
+
init_multibyte_sample
|
10
23
|
end
|
11
24
|
|
12
|
-
def
|
25
|
+
def init_ascii_sample
|
26
|
+
|
27
|
+
path = ASCII_SAMPLE_PATH
|
13
28
|
|
14
29
|
# guard
|
15
30
|
# - if file is already exists, do not
|
16
|
-
return nil if File.exist?(
|
31
|
+
return nil if File.exist?(path)
|
17
32
|
|
18
33
|
# create sorted string
|
19
|
-
str = (0..10_000)
|
34
|
+
str = create_lines(0..10_000)
|
20
35
|
|
21
|
-
IO.write(
|
36
|
+
IO.write(path, str)
|
22
37
|
end
|
23
38
|
|
24
|
-
|
39
|
+
def init_multibyte_sample
|
40
|
+
|
41
|
+
path = MULTIBYTE_SAMPLE_PATH
|
42
|
+
|
43
|
+
# guard
|
44
|
+
# - if file is already exists, do not
|
45
|
+
return nil if File.exist?(path)
|
46
|
+
|
47
|
+
# create sorted string
|
48
|
+
str = create_lines('ぁぁ'..'んん')
|
49
|
+
|
50
|
+
IO.write(path, str)
|
51
|
+
end
|
52
|
+
|
53
|
+
# check correct for ascii file
|
54
|
+
#
|
55
|
+
# args
|
56
|
+
# pos ... Integer object for position in file
|
57
|
+
# (returner of FileBsearch#index)
|
58
|
+
# str ... String object for comparison
|
59
|
+
def ascii_correct?(pos, str)
|
60
|
+
str == IO.read(ASCII_SAMPLE_PATH, str.bytesize, pos)
|
61
|
+
end
|
62
|
+
|
63
|
+
# check correct for ascii file
|
25
64
|
#
|
26
65
|
# args
|
27
|
-
#
|
28
|
-
#
|
29
|
-
|
30
|
-
|
66
|
+
# pos ... Integer object for position in file
|
67
|
+
# (returner of FileBsearch#index)
|
68
|
+
# str ... String object for comparison
|
69
|
+
def multibyte_correct?(pos, str)
|
70
|
+
|
71
|
+
read_str = IO.read(MULTIBYTE_SAMPLE_PATH, str.bytesize, pos)
|
72
|
+
str == read_str.toutf8
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
def create_lines(range)
|
78
|
+
range.to_a.sort { |a, b| a.to_s <=> b.to_s }.join("\n")
|
31
79
|
end
|
32
80
|
end
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: file_bsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- indeep-xyz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10-
|
11
|
+
date: 2014-10-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: file_char_licker
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.5'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0.5'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: bundler
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,8 +80,8 @@ dependencies:
|
|
66
80
|
- - ">="
|
67
81
|
- !ruby/object:Gem::Version
|
68
82
|
version: '0'
|
69
|
-
description:
|
70
|
-
bigger.
|
83
|
+
description: |
|
84
|
+
binary search for sorted text file. it is effective when file size is bigger.
|
71
85
|
email:
|
72
86
|
- indeep.xyz@gmail.com
|
73
87
|
executables: []
|