textrepo 0.4.5 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/textrepo/error.rb +26 -13
- data/lib/textrepo/file_system_repository.rb +194 -2
- data/lib/textrepo/repository.rb +19 -0
- data/lib/textrepo/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dc5cf6089b4883c93dc228e19aa0a149d71a8d9cc26a92e6c75fdc4ee0b2d694
|
4
|
+
data.tar.gz: 8ebecace02d486b6b6c12256d52adfd44963a3a14f6d39729b83d426bce3a26e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aef27bf0363a66eeb1676ccda0682c253155875a8d0c16b27189674836edf2563b8df87b7f3b56cdb5d02c895f724f47a956588a5d97d24d83cdddce76fc083e
|
7
|
+
data.tar.gz: 81a4f8a76eb0ec8545e29ddbd537d49ec00f6009f5617dad61c61d8a5552c6df090d6d37fd507a7976c5f5f60b7d0c8b43f9a99d98d7efa1e933828270711d87
|
data/CHANGELOG.md
CHANGED
@@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/).
|
|
7
7
|
## [Unreleased]
|
8
8
|
Nothing to record here.
|
9
9
|
|
10
|
+
## [0.5.0] - 2020-11-01
|
11
|
+
### Added
|
12
|
+
- Add a new API `Repository#search`.
|
13
|
+
- Add a new API `Repository#exist?`. (0.4.3)
|
14
|
+
|
10
15
|
## [0.4.0] - 2020-10-14
|
11
16
|
### Added
|
12
17
|
- Released to rubygems.org.
|
data/lib/textrepo/error.rb
CHANGED
@@ -2,19 +2,21 @@ module Textrepo
|
|
2
2
|
|
3
3
|
##
|
4
4
|
# Following errors might occur in repository operations:
|
5
|
-
#
|
6
|
-
# | operation (args)
|
7
|
-
#
|
8
|
-
# | create (timestamp, text)
|
9
|
-
# |
|
10
|
-
#
|
11
|
-
# | read (timestamp)
|
12
|
-
#
|
13
|
-
# | update (timestamp, text)
|
14
|
-
# |
|
15
|
-
#
|
16
|
-
# | delete (timestamp)
|
17
|
-
#
|
5
|
+
# +---------------------------------+-----------------------+
|
6
|
+
# | operation (args) | error type |
|
7
|
+
# +---------------------------------+-----------------------+
|
8
|
+
# | create (timestamp, text) | Duplicate timestamp |
|
9
|
+
# | | Empty text |
|
10
|
+
# +---------------------------------+-----------------------+
|
11
|
+
# | read (timestamp) | Missing timestamp |
|
12
|
+
# +---------------------------------+-----------------------+
|
13
|
+
# | update (timestamp, text) | Mssing timestamp |
|
14
|
+
# | | Empty text |
|
15
|
+
# +---------------------------------+-----------------------+
|
16
|
+
# | delete (timestamp) | Missing timestamp |
|
17
|
+
# +---------------------------------+-----------------------+
|
18
|
+
# | search (pattern, stamp_pattern) | Invalid search result |
|
19
|
+
# +---------------------------------+-----------------------+
|
18
20
|
|
19
21
|
class Error < StandardError; end
|
20
22
|
|
@@ -25,6 +27,7 @@ module Textrepo
|
|
25
27
|
EMPTY_TEXT = 'empty text'
|
26
28
|
MISSING_TIMESTAMP = 'missing timestamp: %s'
|
27
29
|
INVALID_TIMESTAMP_STRING = "invalid string as timestamp: %s"
|
30
|
+
INVALID_SEARCH_RESULT = "invalid result by searcher: %s"
|
28
31
|
end
|
29
32
|
# :startdoc:
|
30
33
|
|
@@ -77,4 +80,14 @@ module Textrepo
|
|
77
80
|
end
|
78
81
|
end
|
79
82
|
|
83
|
+
##
|
84
|
+
# An error raise if the search result is not suitable to use.
|
85
|
+
#
|
86
|
+
|
87
|
+
class InvalidSearchResultError < Error
|
88
|
+
def initialize(str)
|
89
|
+
super(ErrMsg::INVALID_SEARCH_RESULT % str)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
80
93
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'fileutils'
|
2
|
+
require "open3"
|
2
3
|
|
3
4
|
module Textrepo
|
4
5
|
|
@@ -19,6 +20,16 @@ module Textrepo
|
|
19
20
|
|
20
21
|
attr_reader :extname
|
21
22
|
|
23
|
+
##
|
24
|
+
# Searcher program name.
|
25
|
+
|
26
|
+
attr_reader :searcher
|
27
|
+
|
28
|
+
##
|
29
|
+
# An array of options to pass to the searcher program.
|
30
|
+
|
31
|
+
attr_reader :searcher_options
|
32
|
+
|
22
33
|
##
|
23
34
|
# Default name for the repository which uses when no name is
|
24
35
|
# specified in the configuration settings.
|
@@ -31,6 +42,11 @@ module Textrepo
|
|
31
42
|
|
32
43
|
FAVORITE_EXTNAME = 'md'
|
33
44
|
|
45
|
+
##
|
46
|
+
# Default searcher program to search text in the repository.
|
47
|
+
|
48
|
+
FAVORITE_SEARCHER = 'grep'
|
49
|
+
|
34
50
|
##
|
35
51
|
# Creates a new repository object. The argument, `conf` must be a
|
36
52
|
# Hash object. It should hold the follwoing values:
|
@@ -41,15 +57,32 @@ module Textrepo
|
|
41
57
|
# - OPTIONAL: (if not specified, default values are used)
|
42
58
|
# - :repository_name => basename of the root path for the repository
|
43
59
|
# - :default_extname => extname for a file stored into in the repository
|
60
|
+
# - :searcher => a program to search like `grep`
|
61
|
+
# - :searcher_options => an Array of option to pass to the searcher
|
44
62
|
#
|
45
63
|
# The root path of the repository looks like the following:
|
46
64
|
# - conf[:repository_base]/conf[:repository_name]
|
47
65
|
#
|
48
|
-
# Default values are set when
|
66
|
+
# Default values are set when `:repository_name` and `:default_extname`
|
49
67
|
# were not defined in `conf`.
|
50
68
|
#
|
69
|
+
# Be careful to set `:searcher_options`, it must be to specify the
|
70
|
+
# searcher behavior equivalent to `grep` with "-inR". The default
|
71
|
+
# value for the searcher options is defined for BSD grep (default
|
72
|
+
# grep on macOS), GNU grep, and ripgrep (aka rg). They are:
|
73
|
+
#
|
74
|
+
# "grep" => ["-i", "-n", "-R", "-E"]
|
75
|
+
# "egrep" => ["-i", "-n", "-R"]
|
76
|
+
# "ggrep" => ["-i", "-n", "-R", "-E"]
|
77
|
+
# "gegrep" => ["-i", "-n", "-R"]
|
78
|
+
# "rg" => ["-S", "-n", "--no-heading", "--color", "never"]
|
79
|
+
#
|
80
|
+
# If use those 3 searchers, it is not recommended to set
|
81
|
+
# `:searcher_options`. The default value works well in
|
82
|
+
# `textrepo`.
|
83
|
+
#
|
51
84
|
# :call-seq:
|
52
|
-
# new(
|
85
|
+
# new(Hash or Hash like object) -> FileSystemRepository
|
53
86
|
|
54
87
|
def initialize(conf)
|
55
88
|
super
|
@@ -58,6 +91,8 @@ module Textrepo
|
|
58
91
|
@path = File.expand_path("#{name}", base)
|
59
92
|
FileUtils.mkdir_p(@path)
|
60
93
|
@extname = conf[:default_extname] || FAVORITE_EXTNAME
|
94
|
+
@searcher = find_searcher(conf[:searcher])
|
95
|
+
@searcher_options = conf[:searcher_options]
|
61
96
|
end
|
62
97
|
|
63
98
|
##
|
@@ -179,6 +214,27 @@ module Textrepo
|
|
179
214
|
FileTest.exist?(abspath(timestamp))
|
180
215
|
end
|
181
216
|
|
217
|
+
##
|
218
|
+
# Searches a pattern in all text. The given pattern is a word to
|
219
|
+
# search or a regular expression. The pattern would be passed to
|
220
|
+
# a searcher program as it passed.
|
221
|
+
#
|
222
|
+
# See the document for Textrepo::Repository#search to know about
|
223
|
+
# the search result.
|
224
|
+
#
|
225
|
+
# :call-seq:
|
226
|
+
# search(String for pattern, String for Timestamp pattern) -> Array
|
227
|
+
|
228
|
+
def search(pattern, stamp_pattern = nil)
|
229
|
+
result = nil
|
230
|
+
if stamp_pattern.nil?
|
231
|
+
result = invoke_searcher_at_repo_root(@searcher, pattern)
|
232
|
+
else
|
233
|
+
result = invoke_searcher_for_entries(@searcher, pattern, entries(stamp_pattern))
|
234
|
+
end
|
235
|
+
construct_search_result(result)
|
236
|
+
end
|
237
|
+
|
182
238
|
# :stopdoc:
|
183
239
|
|
184
240
|
private
|
@@ -219,6 +275,142 @@ module Textrepo
|
|
219
275
|
}.compact
|
220
276
|
end
|
221
277
|
|
278
|
+
##
|
279
|
+
# The upper limit of files to search at one time. The value has
|
280
|
+
# no reason to select. It seems to me that not too much, not too
|
281
|
+
# little to handle in one process to search.
|
282
|
+
|
283
|
+
LIMIT_OF_FILES = 20
|
284
|
+
|
285
|
+
##
|
286
|
+
# When no timestamp pattern was given, invoke the searcher with
|
287
|
+
# the repository root path as its argument and the recursive
|
288
|
+
# searching option. The search could be done in only one process.
|
289
|
+
|
290
|
+
def invoke_searcher_at_repo_root(searcher, pattern)
|
291
|
+
o, s = Open3.capture2(searcher, *find_searcher_options(searcher),
|
292
|
+
pattern, @path)
|
293
|
+
output = []
|
294
|
+
output += o.lines.map(&:chomp) if s.success? && (! o.empty?)
|
295
|
+
output
|
296
|
+
end
|
297
|
+
|
298
|
+
##
|
299
|
+
# When a timestamp pattern was given, at first, list target files,
|
300
|
+
# then invoke the searcher for those files. Since the number of
|
301
|
+
# target files may be so much, it seems to be dangerous to pass
|
302
|
+
# all of them to a single search process at one time.
|
303
|
+
#
|
304
|
+
# One more thing to mention, the searcher, like `grep`, does not
|
305
|
+
# add the filename at the beginning of the search result line, if
|
306
|
+
# the target is one file. This behavior is not suitable in this
|
307
|
+
# purpose. The code below adds the filename when the target is
|
308
|
+
# one file.
|
309
|
+
|
310
|
+
def invoke_searcher_for_entries(searcher, pattern, entries)
|
311
|
+
output = []
|
312
|
+
|
313
|
+
num_of_entries = entries.size
|
314
|
+
if num_of_entries == 1
|
315
|
+
# If the search taget is one file, the output needs special
|
316
|
+
# treatment.
|
317
|
+
file = abspath(entries[0])
|
318
|
+
o, s = Open3.capture2(searcher, *find_searcher_options(searcher),
|
319
|
+
pattern, file)
|
320
|
+
if s.success? && (! o.empty)
|
321
|
+
output += o.lines.map { |line|
|
322
|
+
# add filename at the beginning of the search result line
|
323
|
+
[file, line.chomp].join(":")
|
324
|
+
}
|
325
|
+
end
|
326
|
+
elsif num_of_entries > LIMIT_OF_FILES
|
327
|
+
output += invoke_searcher_for_entries(searcher, pattern, entries[0..(LIMIT_OF_FILES - 1)])
|
328
|
+
output += invoke_searcher_for_entries(searcher, pattern, entries[LIMIT_OF_FILES..-1])
|
329
|
+
else
|
330
|
+
# When the number of target is less than the upper limit,
|
331
|
+
# invoke the searcher with all of target files as its
|
332
|
+
# arguments.
|
333
|
+
files = find_files(entries)
|
334
|
+
o, s = Open3.capture2(searcher, *find_searcher_options(searcher),
|
335
|
+
pattern, *files)
|
336
|
+
if s.success? && (! o.empty)
|
337
|
+
output += o.lines.map(&:chomp)
|
338
|
+
end
|
339
|
+
end
|
340
|
+
|
341
|
+
output
|
342
|
+
end
|
343
|
+
|
344
|
+
SEARCHER_OPTS = {
|
345
|
+
# case insensitive, print line number, recursive search, work as egrep
|
346
|
+
"grep" => ["-i", "-n", "-R", "-E"],
|
347
|
+
# case insensitive, print line number, recursive search
|
348
|
+
"egrep" => ["-i", "-n", "-R"],
|
349
|
+
# case insensitive, print line number, recursive search, work as gegrep
|
350
|
+
"ggrep" => ["-i", "-n", "-R", "-E"],
|
351
|
+
# case insensitive, print line number, recursive search
|
352
|
+
"gegrep" => ["-i", "-n", "-R"],
|
353
|
+
# smart case, print line number, no color
|
354
|
+
"rg" => ["-S", "-n", "--no-heading", "--color", "never"],
|
355
|
+
}
|
356
|
+
|
357
|
+
def find_searcher_options(searcher)
|
358
|
+
@searcher_options || SEARCHER_OPTS[File.basename(searcher)] || ""
|
359
|
+
end
|
360
|
+
|
361
|
+
def find_files(timestamps)
|
362
|
+
timestamps.map{|stamp| abspath(stamp)}
|
363
|
+
end
|
364
|
+
|
365
|
+
##
|
366
|
+
# The argument must be an Array contains the searcher output.
|
367
|
+
# Each item is constructed from 3 parts:
|
368
|
+
# "<pathname>:<integer>:<text>"
|
369
|
+
#
|
370
|
+
# For example, it may looks like:
|
371
|
+
#
|
372
|
+
# "/somewhere/2020/11/20201101044300.md:18:foo is foo"
|
373
|
+
#
|
374
|
+
# Or it may contains more ":" in the text part as:
|
375
|
+
#
|
376
|
+
# "/somewhere/2020/11/20201101044500.md:119:apple:orange:grape"
|
377
|
+
#
|
378
|
+
# In the latter case, `split(":")` will split it too much. That is,
|
379
|
+
# the result will be:
|
380
|
+
#
|
381
|
+
# ["/somewhere/2020/11/20201101044500.md", "119", "apple", "orange", "grape"]
|
382
|
+
#
|
383
|
+
# Text part must be joined with ":".
|
384
|
+
|
385
|
+
def construct_search_result(output)
|
386
|
+
output.map { |line|
|
387
|
+
begin
|
388
|
+
pathname, num, *match_text = line.split(":")
|
389
|
+
[Timestamp.parse_s(timestamp_str(pathname)),
|
390
|
+
num.to_i,
|
391
|
+
match_text.join(":")]
|
392
|
+
rescue InvalidTimestampStringError, TypeError => _
|
393
|
+
raise InvalidSearchResultError, [@searcher, @searcher_options.join(" ")].join(" ")
|
394
|
+
end
|
395
|
+
}.compact
|
396
|
+
end
|
397
|
+
|
398
|
+
def find_searcher(program = nil)
|
399
|
+
candidates = [FAVORITE_SEARCHER]
|
400
|
+
candidates.unshift(program) unless program.nil? || candidates.include?(program)
|
401
|
+
search_paths = ENV["PATH"].split(":")
|
402
|
+
candidates.map { |prog|
|
403
|
+
find_in_paths(prog, search_paths)
|
404
|
+
}[0]
|
405
|
+
end
|
406
|
+
|
407
|
+
def find_in_paths(prog, paths)
|
408
|
+
paths.each { |p|
|
409
|
+
abspath = File.expand_path(prog, p)
|
410
|
+
return abspath if FileTest.exist?(abspath) && FileTest.executable?(abspath)
|
411
|
+
}
|
412
|
+
nil
|
413
|
+
end
|
222
414
|
# :startdoc:
|
223
415
|
|
224
416
|
end
|
data/lib/textrepo/repository.rb
CHANGED
@@ -92,6 +92,25 @@ module Textrepo
|
|
92
92
|
# exist?(Timestamp) -> true or false
|
93
93
|
|
94
94
|
def exist?(timestamp); false; end
|
95
|
+
|
96
|
+
##
|
97
|
+
# Searches a pattern (word or regular expression) in text those
|
98
|
+
# matches to a given timestamp pattern. Returns an Array of
|
99
|
+
# search results. If no match, returns an empty Array.
|
100
|
+
#
|
101
|
+
# See the document for Repository#entries about a timestamp
|
102
|
+
# pattern. When nil is passed as a timestamp pattern, searching
|
103
|
+
# applies to all text in the repository.
|
104
|
+
#
|
105
|
+
# Each entry of the result Array is constructed from 3 items, (1)
|
106
|
+
# timestamp (Timestamp), (2) line number (Integer), (3) matched
|
107
|
+
# line (String).
|
108
|
+
#
|
109
|
+
# :call-seq:
|
110
|
+
# search(String for pattern, String for Timestamp pattern) -> Array
|
111
|
+
|
112
|
+
def search(pattern, stamp_pattern = nil); []; end
|
113
|
+
|
95
114
|
end
|
96
115
|
|
97
116
|
require_relative 'file_system_repository'
|
data/lib/textrepo/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: textrepo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- mnbi
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-11-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|