textrepo 0.4.5 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/textrepo/error.rb +26 -13
- data/lib/textrepo/file_system_repository.rb +194 -2
- data/lib/textrepo/repository.rb +19 -0
- data/lib/textrepo/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dc5cf6089b4883c93dc228e19aa0a149d71a8d9cc26a92e6c75fdc4ee0b2d694
|
4
|
+
data.tar.gz: 8ebecace02d486b6b6c12256d52adfd44963a3a14f6d39729b83d426bce3a26e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aef27bf0363a66eeb1676ccda0682c253155875a8d0c16b27189674836edf2563b8df87b7f3b56cdb5d02c895f724f47a956588a5d97d24d83cdddce76fc083e
|
7
|
+
data.tar.gz: 81a4f8a76eb0ec8545e29ddbd537d49ec00f6009f5617dad61c61d8a5552c6df090d6d37fd507a7976c5f5f60b7d0c8b43f9a99d98d7efa1e933828270711d87
|
data/CHANGELOG.md
CHANGED
@@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/).
|
|
7
7
|
## [Unreleased]
|
8
8
|
Nothing to record here.
|
9
9
|
|
10
|
+
## [0.5.0] - 2020-11-01
|
11
|
+
### Added
|
12
|
+
- Add a new API `Repository#search`.
|
13
|
+
- Add a new API `Repository#exist?`. (0.4.3)
|
14
|
+
|
10
15
|
## [0.4.0] - 2020-10-14
|
11
16
|
### Added
|
12
17
|
- Released to rubygems.org.
|
data/lib/textrepo/error.rb
CHANGED
@@ -2,19 +2,21 @@ module Textrepo
|
|
2
2
|
|
3
3
|
##
|
4
4
|
# Following errors might occur in repository operations:
|
5
|
-
#
|
6
|
-
# | operation (args)
|
7
|
-
#
|
8
|
-
# | create (timestamp, text)
|
9
|
-
# |
|
10
|
-
#
|
11
|
-
# | read (timestamp)
|
12
|
-
#
|
13
|
-
# | update (timestamp, text)
|
14
|
-
# |
|
15
|
-
#
|
16
|
-
# | delete (timestamp)
|
17
|
-
#
|
5
|
+
# +---------------------------------+-----------------------+
|
6
|
+
# | operation (args) | error type |
|
7
|
+
# +---------------------------------+-----------------------+
|
8
|
+
# | create (timestamp, text) | Duplicate timestamp |
|
9
|
+
# | | Empty text |
|
10
|
+
# +---------------------------------+-----------------------+
|
11
|
+
# | read (timestamp) | Missing timestamp |
|
12
|
+
# +---------------------------------+-----------------------+
|
13
|
+
# | update (timestamp, text) | Mssing timestamp |
|
14
|
+
# | | Empty text |
|
15
|
+
# +---------------------------------+-----------------------+
|
16
|
+
# | delete (timestamp) | Missing timestamp |
|
17
|
+
# +---------------------------------+-----------------------+
|
18
|
+
# | search (pattern, stamp_pattern) | Invalid search result |
|
19
|
+
# +---------------------------------+-----------------------+
|
18
20
|
|
19
21
|
class Error < StandardError; end
|
20
22
|
|
@@ -25,6 +27,7 @@ module Textrepo
|
|
25
27
|
EMPTY_TEXT = 'empty text'
|
26
28
|
MISSING_TIMESTAMP = 'missing timestamp: %s'
|
27
29
|
INVALID_TIMESTAMP_STRING = "invalid string as timestamp: %s"
|
30
|
+
INVALID_SEARCH_RESULT = "invalid result by searcher: %s"
|
28
31
|
end
|
29
32
|
# :startdoc:
|
30
33
|
|
@@ -77,4 +80,14 @@ module Textrepo
|
|
77
80
|
end
|
78
81
|
end
|
79
82
|
|
83
|
+
##
|
84
|
+
# An error raise if the search result is not suitable to use.
|
85
|
+
#
|
86
|
+
|
87
|
+
class InvalidSearchResultError < Error
|
88
|
+
def initialize(str)
|
89
|
+
super(ErrMsg::INVALID_SEARCH_RESULT % str)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
80
93
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'fileutils'
|
2
|
+
require "open3"
|
2
3
|
|
3
4
|
module Textrepo
|
4
5
|
|
@@ -19,6 +20,16 @@ module Textrepo
|
|
19
20
|
|
20
21
|
attr_reader :extname
|
21
22
|
|
23
|
+
##
|
24
|
+
# Searcher program name.
|
25
|
+
|
26
|
+
attr_reader :searcher
|
27
|
+
|
28
|
+
##
|
29
|
+
# An array of options to pass to the searcher program.
|
30
|
+
|
31
|
+
attr_reader :searcher_options
|
32
|
+
|
22
33
|
##
|
23
34
|
# Default name for the repository which uses when no name is
|
24
35
|
# specified in the configuration settings.
|
@@ -31,6 +42,11 @@ module Textrepo
|
|
31
42
|
|
32
43
|
FAVORITE_EXTNAME = 'md'
|
33
44
|
|
45
|
+
##
|
46
|
+
# Default searcher program to search text in the repository.
|
47
|
+
|
48
|
+
FAVORITE_SEARCHER = 'grep'
|
49
|
+
|
34
50
|
##
|
35
51
|
# Creates a new repository object. The argument, `conf` must be a
|
36
52
|
# Hash object. It should hold the follwoing values:
|
@@ -41,15 +57,32 @@ module Textrepo
|
|
41
57
|
# - OPTIONAL: (if not specified, default values are used)
|
42
58
|
# - :repository_name => basename of the root path for the repository
|
43
59
|
# - :default_extname => extname for a file stored into in the repository
|
60
|
+
# - :searcher => a program to search like `grep`
|
61
|
+
# - :searcher_options => an Array of option to pass to the searcher
|
44
62
|
#
|
45
63
|
# The root path of the repository looks like the following:
|
46
64
|
# - conf[:repository_base]/conf[:repository_name]
|
47
65
|
#
|
48
|
-
# Default values are set when
|
66
|
+
# Default values are set when `:repository_name` and `:default_extname`
|
49
67
|
# were not defined in `conf`.
|
50
68
|
#
|
69
|
+
# Be careful to set `:searcher_options`, it must be to specify the
|
70
|
+
# searcher behavior equivalent to `grep` with "-inR". The default
|
71
|
+
# value for the searcher options is defined for BSD grep (default
|
72
|
+
# grep on macOS), GNU grep, and ripgrep (aka rg). They are:
|
73
|
+
#
|
74
|
+
# "grep" => ["-i", "-n", "-R", "-E"]
|
75
|
+
# "egrep" => ["-i", "-n", "-R"]
|
76
|
+
# "ggrep" => ["-i", "-n", "-R", "-E"]
|
77
|
+
# "gegrep" => ["-i", "-n", "-R"]
|
78
|
+
# "rg" => ["-S", "-n", "--no-heading", "--color", "never"]
|
79
|
+
#
|
80
|
+
# If use those 3 searchers, it is not recommended to set
|
81
|
+
# `:searcher_options`. The default value works well in
|
82
|
+
# `textrepo`.
|
83
|
+
#
|
51
84
|
# :call-seq:
|
52
|
-
# new(
|
85
|
+
# new(Hash or Hash like object) -> FileSystemRepository
|
53
86
|
|
54
87
|
def initialize(conf)
|
55
88
|
super
|
@@ -58,6 +91,8 @@ module Textrepo
|
|
58
91
|
@path = File.expand_path("#{name}", base)
|
59
92
|
FileUtils.mkdir_p(@path)
|
60
93
|
@extname = conf[:default_extname] || FAVORITE_EXTNAME
|
94
|
+
@searcher = find_searcher(conf[:searcher])
|
95
|
+
@searcher_options = conf[:searcher_options]
|
61
96
|
end
|
62
97
|
|
63
98
|
##
|
@@ -179,6 +214,27 @@ module Textrepo
|
|
179
214
|
FileTest.exist?(abspath(timestamp))
|
180
215
|
end
|
181
216
|
|
217
|
+
##
|
218
|
+
# Searches a pattern in all text. The given pattern is a word to
|
219
|
+
# search or a regular expression. The pattern would be passed to
|
220
|
+
# a searcher program as it passed.
|
221
|
+
#
|
222
|
+
# See the document for Textrepo::Repository#search to know about
|
223
|
+
# the search result.
|
224
|
+
#
|
225
|
+
# :call-seq:
|
226
|
+
# search(String for pattern, String for Timestamp pattern) -> Array
|
227
|
+
|
228
|
+
def search(pattern, stamp_pattern = nil)
|
229
|
+
result = nil
|
230
|
+
if stamp_pattern.nil?
|
231
|
+
result = invoke_searcher_at_repo_root(@searcher, pattern)
|
232
|
+
else
|
233
|
+
result = invoke_searcher_for_entries(@searcher, pattern, entries(stamp_pattern))
|
234
|
+
end
|
235
|
+
construct_search_result(result)
|
236
|
+
end
|
237
|
+
|
182
238
|
# :stopdoc:
|
183
239
|
|
184
240
|
private
|
@@ -219,6 +275,142 @@ module Textrepo
|
|
219
275
|
}.compact
|
220
276
|
end
|
221
277
|
|
278
|
+
##
|
279
|
+
# The upper limit of files to search at one time. The value has
|
280
|
+
# no reason to select. It seems to me that not too much, not too
|
281
|
+
# little to handle in one process to search.
|
282
|
+
|
283
|
+
LIMIT_OF_FILES = 20
|
284
|
+
|
285
|
+
##
|
286
|
+
# When no timestamp pattern was given, invoke the searcher with
|
287
|
+
# the repository root path as its argument and the recursive
|
288
|
+
# searching option. The search could be done in only one process.
|
289
|
+
|
290
|
+
def invoke_searcher_at_repo_root(searcher, pattern)
|
291
|
+
o, s = Open3.capture2(searcher, *find_searcher_options(searcher),
|
292
|
+
pattern, @path)
|
293
|
+
output = []
|
294
|
+
output += o.lines.map(&:chomp) if s.success? && (! o.empty?)
|
295
|
+
output
|
296
|
+
end
|
297
|
+
|
298
|
+
##
|
299
|
+
# When a timestamp pattern was given, at first, list target files,
|
300
|
+
# then invoke the searcher for those files. Since the number of
|
301
|
+
# target files may be so much, it seems to be dangerous to pass
|
302
|
+
# all of them to a single search process at one time.
|
303
|
+
#
|
304
|
+
# One more thing to mention, the searcher, like `grep`, does not
|
305
|
+
# add the filename at the beginning of the search result line, if
|
306
|
+
# the target is one file. This behavior is not suitable in this
|
307
|
+
# purpose. The code below adds the filename when the target is
|
308
|
+
# one file.
|
309
|
+
|
310
|
+
def invoke_searcher_for_entries(searcher, pattern, entries)
|
311
|
+
output = []
|
312
|
+
|
313
|
+
num_of_entries = entries.size
|
314
|
+
if num_of_entries == 1
|
315
|
+
# If the search taget is one file, the output needs special
|
316
|
+
# treatment.
|
317
|
+
file = abspath(entries[0])
|
318
|
+
o, s = Open3.capture2(searcher, *find_searcher_options(searcher),
|
319
|
+
pattern, file)
|
320
|
+
if s.success? && (! o.empty)
|
321
|
+
output += o.lines.map { |line|
|
322
|
+
# add filename at the beginning of the search result line
|
323
|
+
[file, line.chomp].join(":")
|
324
|
+
}
|
325
|
+
end
|
326
|
+
elsif num_of_entries > LIMIT_OF_FILES
|
327
|
+
output += invoke_searcher_for_entries(searcher, pattern, entries[0..(LIMIT_OF_FILES - 1)])
|
328
|
+
output += invoke_searcher_for_entries(searcher, pattern, entries[LIMIT_OF_FILES..-1])
|
329
|
+
else
|
330
|
+
# When the number of target is less than the upper limit,
|
331
|
+
# invoke the searcher with all of target files as its
|
332
|
+
# arguments.
|
333
|
+
files = find_files(entries)
|
334
|
+
o, s = Open3.capture2(searcher, *find_searcher_options(searcher),
|
335
|
+
pattern, *files)
|
336
|
+
if s.success? && (! o.empty)
|
337
|
+
output += o.lines.map(&:chomp)
|
338
|
+
end
|
339
|
+
end
|
340
|
+
|
341
|
+
output
|
342
|
+
end
|
343
|
+
|
344
|
+
SEARCHER_OPTS = {
|
345
|
+
# case insensitive, print line number, recursive search, work as egrep
|
346
|
+
"grep" => ["-i", "-n", "-R", "-E"],
|
347
|
+
# case insensitive, print line number, recursive search
|
348
|
+
"egrep" => ["-i", "-n", "-R"],
|
349
|
+
# case insensitive, print line number, recursive search, work as gegrep
|
350
|
+
"ggrep" => ["-i", "-n", "-R", "-E"],
|
351
|
+
# case insensitive, print line number, recursive search
|
352
|
+
"gegrep" => ["-i", "-n", "-R"],
|
353
|
+
# smart case, print line number, no color
|
354
|
+
"rg" => ["-S", "-n", "--no-heading", "--color", "never"],
|
355
|
+
}
|
356
|
+
|
357
|
+
def find_searcher_options(searcher)
|
358
|
+
@searcher_options || SEARCHER_OPTS[File.basename(searcher)] || ""
|
359
|
+
end
|
360
|
+
|
361
|
+
def find_files(timestamps)
|
362
|
+
timestamps.map{|stamp| abspath(stamp)}
|
363
|
+
end
|
364
|
+
|
365
|
+
##
|
366
|
+
# The argument must be an Array contains the searcher output.
|
367
|
+
# Each item is constructed from 3 parts:
|
368
|
+
# "<pathname>:<integer>:<text>"
|
369
|
+
#
|
370
|
+
# For example, it may looks like:
|
371
|
+
#
|
372
|
+
# "/somewhere/2020/11/20201101044300.md:18:foo is foo"
|
373
|
+
#
|
374
|
+
# Or it may contains more ":" in the text part as:
|
375
|
+
#
|
376
|
+
# "/somewhere/2020/11/20201101044500.md:119:apple:orange:grape"
|
377
|
+
#
|
378
|
+
# In the latter case, `split(":")` will split it too much. That is,
|
379
|
+
# the result will be:
|
380
|
+
#
|
381
|
+
# ["/somewhere/2020/11/20201101044500.md", "119", "apple", "orange", "grape"]
|
382
|
+
#
|
383
|
+
# Text part must be joined with ":".
|
384
|
+
|
385
|
+
def construct_search_result(output)
|
386
|
+
output.map { |line|
|
387
|
+
begin
|
388
|
+
pathname, num, *match_text = line.split(":")
|
389
|
+
[Timestamp.parse_s(timestamp_str(pathname)),
|
390
|
+
num.to_i,
|
391
|
+
match_text.join(":")]
|
392
|
+
rescue InvalidTimestampStringError, TypeError => _
|
393
|
+
raise InvalidSearchResultError, [@searcher, @searcher_options.join(" ")].join(" ")
|
394
|
+
end
|
395
|
+
}.compact
|
396
|
+
end
|
397
|
+
|
398
|
+
def find_searcher(program = nil)
|
399
|
+
candidates = [FAVORITE_SEARCHER]
|
400
|
+
candidates.unshift(program) unless program.nil? || candidates.include?(program)
|
401
|
+
search_paths = ENV["PATH"].split(":")
|
402
|
+
candidates.map { |prog|
|
403
|
+
find_in_paths(prog, search_paths)
|
404
|
+
}[0]
|
405
|
+
end
|
406
|
+
|
407
|
+
def find_in_paths(prog, paths)
|
408
|
+
paths.each { |p|
|
409
|
+
abspath = File.expand_path(prog, p)
|
410
|
+
return abspath if FileTest.exist?(abspath) && FileTest.executable?(abspath)
|
411
|
+
}
|
412
|
+
nil
|
413
|
+
end
|
222
414
|
# :startdoc:
|
223
415
|
|
224
416
|
end
|
data/lib/textrepo/repository.rb
CHANGED
@@ -92,6 +92,25 @@ module Textrepo
|
|
92
92
|
# exist?(Timestamp) -> true or false
|
93
93
|
|
94
94
|
def exist?(timestamp); false; end
|
95
|
+
|
96
|
+
##
|
97
|
+
# Searches a pattern (word or regular expression) in text those
|
98
|
+
# matches to a given timestamp pattern. Returns an Array of
|
99
|
+
# search results. If no match, returns an empty Array.
|
100
|
+
#
|
101
|
+
# See the document for Repository#entries about a timestamp
|
102
|
+
# pattern. When nil is passed as a timestamp pattern, searching
|
103
|
+
# applies to all text in the repository.
|
104
|
+
#
|
105
|
+
# Each entry of the result Array is constructed from 3 items, (1)
|
106
|
+
# timestamp (Timestamp), (2) line number (Integer), (3) matched
|
107
|
+
# line (String).
|
108
|
+
#
|
109
|
+
# :call-seq:
|
110
|
+
# search(String for pattern, String for Timestamp pattern) -> Array
|
111
|
+
|
112
|
+
def search(pattern, stamp_pattern = nil); []; end
|
113
|
+
|
95
114
|
end
|
96
115
|
|
97
116
|
require_relative 'file_system_repository'
|
data/lib/textrepo/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: textrepo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- mnbi
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-11-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|