textrepo 0.4.5 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 24594803cdcc722b7a9ca01648a9d8955cb9637669ab1c459e653c2eb2d2c199
4
- data.tar.gz: bc5802a9a1df190d2278163b25d5ff1d2d107223acec17a3e713d1daf30a2218
3
+ metadata.gz: dc5cf6089b4883c93dc228e19aa0a149d71a8d9cc26a92e6c75fdc4ee0b2d694
4
+ data.tar.gz: 8ebecace02d486b6b6c12256d52adfd44963a3a14f6d39729b83d426bce3a26e
5
5
  SHA512:
6
- metadata.gz: 21ef95e13e30d816b671be203b7beffa1ab022aec94bfc9540a7712f548cbf5749a8f5dc8f13e1f22067f53f52ac63dd2ac75ffa32719d72deb49b6b410c05a3
7
- data.tar.gz: bef5bcfbef5a5557f80c44035dd42a9c8474cb04836e3328a6b0a7f0c065d5b601ff92bfcd4dee4a7a7c3273ac9e0648ae7a1e3bb52acdd95627de9266a04767
6
+ metadata.gz: aef27bf0363a66eeb1676ccda0682c253155875a8d0c16b27189674836edf2563b8df87b7f3b56cdb5d02c895f724f47a956588a5d97d24d83cdddce76fc083e
7
+ data.tar.gz: 81a4f8a76eb0ec8545e29ddbd537d49ec00f6009f5617dad61c61d8a5552c6df090d6d37fd507a7976c5f5f60b7d0c8b43f9a99d98d7efa1e933828270711d87
@@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/).
7
7
  ## [Unreleased]
8
8
  Nothing to record here.
9
9
 
10
+ ## [0.5.0] - 2020-11-01
11
+ ### Added
12
+ - Add a new API `Repository#search`.
13
+ - Add a new API `Repository#exist?`. (0.4.3)
14
+
10
15
  ## [0.4.0] - 2020-10-14
11
16
  ### Added
12
17
  - Released to rubygems.org.
@@ -2,19 +2,21 @@ module Textrepo
2
2
 
3
3
  ##
4
4
  # Following errors might occur in repository operations:
5
- # +--------------------------+---------------------+
6
- # | operation (args) | error type |
7
- # +--------------------------+---------------------+
8
- # | create (timestamp, text) | Duplicate timestamp |
9
- # | | Empty text |
10
- # +--------------------------+---------------------+
11
- # | read (timestamp) | Missing timestamp |
12
- # +--------------------------+---------------------+
13
- # | update (timestamp, text) | Mssing timestamp |
14
- # | | Empty text |
15
- # +--------------------------+---------------------+
16
- # | delete (timestamp) | Missing timestamp |
17
- # +--------------------------+---------------------+
5
+ # +---------------------------------+-----------------------+
6
+ # | operation (args) | error type |
7
+ # +---------------------------------+-----------------------+
8
+ # | create (timestamp, text) | Duplicate timestamp |
9
+ # | | Empty text |
10
+ # +---------------------------------+-----------------------+
11
+ # | read (timestamp) | Missing timestamp |
12
+ # +---------------------------------+-----------------------+
13
+ # | update (timestamp, text) | Mssing timestamp |
14
+ # | | Empty text |
15
+ # +---------------------------------+-----------------------+
16
+ # | delete (timestamp) | Missing timestamp |
17
+ # +---------------------------------+-----------------------+
18
+ # | search (pattern, stamp_pattern) | Invalid search result |
19
+ # +---------------------------------+-----------------------+
18
20
 
19
21
  class Error < StandardError; end
20
22
 
@@ -25,6 +27,7 @@ module Textrepo
25
27
  EMPTY_TEXT = 'empty text'
26
28
  MISSING_TIMESTAMP = 'missing timestamp: %s'
27
29
  INVALID_TIMESTAMP_STRING = "invalid string as timestamp: %s"
30
+ INVALID_SEARCH_RESULT = "invalid result by searcher: %s"
28
31
  end
29
32
  # :startdoc:
30
33
 
@@ -77,4 +80,14 @@ module Textrepo
77
80
  end
78
81
  end
79
82
 
83
+ ##
84
+ # An error raise if the search result is not suitable to use.
85
+ #
86
+
87
+ class InvalidSearchResultError < Error
88
+ def initialize(str)
89
+ super(ErrMsg::INVALID_SEARCH_RESULT % str)
90
+ end
91
+ end
92
+
80
93
  end
@@ -1,4 +1,5 @@
1
1
  require 'fileutils'
2
+ require "open3"
2
3
 
3
4
  module Textrepo
4
5
 
@@ -19,6 +20,16 @@ module Textrepo
19
20
 
20
21
  attr_reader :extname
21
22
 
23
+ ##
24
+ # Searcher program name.
25
+
26
+ attr_reader :searcher
27
+
28
+ ##
29
+ # An array of options to pass to the searcher program.
30
+
31
+ attr_reader :searcher_options
32
+
22
33
  ##
23
34
  # Default name for the repository which uses when no name is
24
35
  # specified in the configuration settings.
@@ -31,6 +42,11 @@ module Textrepo
31
42
 
32
43
  FAVORITE_EXTNAME = 'md'
33
44
 
45
+ ##
46
+ # Default searcher program to search text in the repository.
47
+
48
+ FAVORITE_SEARCHER = 'grep'
49
+
34
50
  ##
35
51
  # Creates a new repository object. The argument, `conf` must be a
36
52
  # Hash object. It should hold the follwoing values:
@@ -41,15 +57,32 @@ module Textrepo
41
57
  # - OPTIONAL: (if not specified, default values are used)
42
58
  # - :repository_name => basename of the root path for the repository
43
59
  # - :default_extname => extname for a file stored into in the repository
60
+ # - :searcher => a program to search like `grep`
61
+ # - :searcher_options => an Array of option to pass to the searcher
44
62
  #
45
63
  # The root path of the repository looks like the following:
46
64
  # - conf[:repository_base]/conf[:repository_name]
47
65
  #
48
- # Default values are set when `repository_name` and `default_extname`
66
+ # Default values are set when `:repository_name` and `:default_extname`
49
67
  # were not defined in `conf`.
50
68
  #
69
+ # Be careful to set `:searcher_options`, it must be to specify the
70
+ # searcher behavior equivalent to `grep` with "-inR". The default
71
+ # value for the searcher options is defined for BSD grep (default
72
+ # grep on macOS), GNU grep, and ripgrep (aka rg). They are:
73
+ #
74
+ # "grep" => ["-i", "-n", "-R", "-E"]
75
+ # "egrep" => ["-i", "-n", "-R"]
76
+ # "ggrep" => ["-i", "-n", "-R", "-E"]
77
+ # "gegrep" => ["-i", "-n", "-R"]
78
+ # "rg" => ["-S", "-n", "--no-heading", "--color", "never"]
79
+ #
80
+ # If use those 3 searchers, it is not recommended to set
81
+ # `:searcher_options`. The default value works well in
82
+ # `textrepo`.
83
+ #
51
84
  # :call-seq:
52
- # new(Rbnotes::Conf or Hash) -> FileSystemRepository
85
+ # new(Hash or Hash like object) -> FileSystemRepository
53
86
 
54
87
  def initialize(conf)
55
88
  super
@@ -58,6 +91,8 @@ module Textrepo
58
91
  @path = File.expand_path("#{name}", base)
59
92
  FileUtils.mkdir_p(@path)
60
93
  @extname = conf[:default_extname] || FAVORITE_EXTNAME
94
+ @searcher = find_searcher(conf[:searcher])
95
+ @searcher_options = conf[:searcher_options]
61
96
  end
62
97
 
63
98
  ##
@@ -179,6 +214,27 @@ module Textrepo
179
214
  FileTest.exist?(abspath(timestamp))
180
215
  end
181
216
 
217
+ ##
218
+ # Searches a pattern in all text. The given pattern is a word to
219
+ # search or a regular expression. The pattern would be passed to
220
+ # a searcher program as it passed.
221
+ #
222
+ # See the document for Textrepo::Repository#search to know about
223
+ # the search result.
224
+ #
225
+ # :call-seq:
226
+ # search(String for pattern, String for Timestamp pattern) -> Array
227
+
228
+ def search(pattern, stamp_pattern = nil)
229
+ result = nil
230
+ if stamp_pattern.nil?
231
+ result = invoke_searcher_at_repo_root(@searcher, pattern)
232
+ else
233
+ result = invoke_searcher_for_entries(@searcher, pattern, entries(stamp_pattern))
234
+ end
235
+ construct_search_result(result)
236
+ end
237
+
182
238
  # :stopdoc:
183
239
 
184
240
  private
@@ -219,6 +275,142 @@ module Textrepo
219
275
  }.compact
220
276
  end
221
277
 
278
+ ##
279
+ # The upper limit of files to search at one time. The value has
280
+ # no reason to select. It seems to me that not too much, not too
281
+ # little to handle in one process to search.
282
+
283
+ LIMIT_OF_FILES = 20
284
+
285
+ ##
286
+ # When no timestamp pattern was given, invoke the searcher with
287
+ # the repository root path as its argument and the recursive
288
+ # searching option. The search could be done in only one process.
289
+
290
+ def invoke_searcher_at_repo_root(searcher, pattern)
291
+ o, s = Open3.capture2(searcher, *find_searcher_options(searcher),
292
+ pattern, @path)
293
+ output = []
294
+ output += o.lines.map(&:chomp) if s.success? && (! o.empty?)
295
+ output
296
+ end
297
+
298
+ ##
299
+ # When a timestamp pattern was given, at first, list target files,
300
+ # then invoke the searcher for those files. Since the number of
301
+ # target files may be so much, it seems to be dangerous to pass
302
+ # all of them to a single search process at one time.
303
+ #
304
+ # One more thing to mention, the searcher, like `grep`, does not
305
+ # add the filename at the beginning of the search result line, if
306
+ # the target is one file. This behavior is not suitable in this
307
+ # purpose. The code below adds the filename when the target is
308
+ # one file.
309
+
310
+ def invoke_searcher_for_entries(searcher, pattern, entries)
311
+ output = []
312
+
313
+ num_of_entries = entries.size
314
+ if num_of_entries == 1
315
+ # If the search taget is one file, the output needs special
316
+ # treatment.
317
+ file = abspath(entries[0])
318
+ o, s = Open3.capture2(searcher, *find_searcher_options(searcher),
319
+ pattern, file)
320
+ if s.success? && (! o.empty)
321
+ output += o.lines.map { |line|
322
+ # add filename at the beginning of the search result line
323
+ [file, line.chomp].join(":")
324
+ }
325
+ end
326
+ elsif num_of_entries > LIMIT_OF_FILES
327
+ output += invoke_searcher_for_entries(searcher, pattern, entries[0..(LIMIT_OF_FILES - 1)])
328
+ output += invoke_searcher_for_entries(searcher, pattern, entries[LIMIT_OF_FILES..-1])
329
+ else
330
+ # When the number of target is less than the upper limit,
331
+ # invoke the searcher with all of target files as its
332
+ # arguments.
333
+ files = find_files(entries)
334
+ o, s = Open3.capture2(searcher, *find_searcher_options(searcher),
335
+ pattern, *files)
336
+ if s.success? && (! o.empty)
337
+ output += o.lines.map(&:chomp)
338
+ end
339
+ end
340
+
341
+ output
342
+ end
343
+
344
+ SEARCHER_OPTS = {
345
+ # case insensitive, print line number, recursive search, work as egrep
346
+ "grep" => ["-i", "-n", "-R", "-E"],
347
+ # case insensitive, print line number, recursive search
348
+ "egrep" => ["-i", "-n", "-R"],
349
+ # case insensitive, print line number, recursive search, work as gegrep
350
+ "ggrep" => ["-i", "-n", "-R", "-E"],
351
+ # case insensitive, print line number, recursive search
352
+ "gegrep" => ["-i", "-n", "-R"],
353
+ # smart case, print line number, no color
354
+ "rg" => ["-S", "-n", "--no-heading", "--color", "never"],
355
+ }
356
+
357
+ def find_searcher_options(searcher)
358
+ @searcher_options || SEARCHER_OPTS[File.basename(searcher)] || ""
359
+ end
360
+
361
+ def find_files(timestamps)
362
+ timestamps.map{|stamp| abspath(stamp)}
363
+ end
364
+
365
+ ##
366
+ # The argument must be an Array contains the searcher output.
367
+ # Each item is constructed from 3 parts:
368
+ # "<pathname>:<integer>:<text>"
369
+ #
370
+ # For example, it may looks like:
371
+ #
372
+ # "/somewhere/2020/11/20201101044300.md:18:foo is foo"
373
+ #
374
+ # Or it may contains more ":" in the text part as:
375
+ #
376
+ # "/somewhere/2020/11/20201101044500.md:119:apple:orange:grape"
377
+ #
378
+ # In the latter case, `split(":")` will split it too much. That is,
379
+ # the result will be:
380
+ #
381
+ # ["/somewhere/2020/11/20201101044500.md", "119", "apple", "orange", "grape"]
382
+ #
383
+ # Text part must be joined with ":".
384
+
385
+ def construct_search_result(output)
386
+ output.map { |line|
387
+ begin
388
+ pathname, num, *match_text = line.split(":")
389
+ [Timestamp.parse_s(timestamp_str(pathname)),
390
+ num.to_i,
391
+ match_text.join(":")]
392
+ rescue InvalidTimestampStringError, TypeError => _
393
+ raise InvalidSearchResultError, [@searcher, @searcher_options.join(" ")].join(" ")
394
+ end
395
+ }.compact
396
+ end
397
+
398
+ def find_searcher(program = nil)
399
+ candidates = [FAVORITE_SEARCHER]
400
+ candidates.unshift(program) unless program.nil? || candidates.include?(program)
401
+ search_paths = ENV["PATH"].split(":")
402
+ candidates.map { |prog|
403
+ find_in_paths(prog, search_paths)
404
+ }[0]
405
+ end
406
+
407
+ def find_in_paths(prog, paths)
408
+ paths.each { |p|
409
+ abspath = File.expand_path(prog, p)
410
+ return abspath if FileTest.exist?(abspath) && FileTest.executable?(abspath)
411
+ }
412
+ nil
413
+ end
222
414
  # :startdoc:
223
415
 
224
416
  end
@@ -92,6 +92,25 @@ module Textrepo
92
92
  # exist?(Timestamp) -> true or false
93
93
 
94
94
  def exist?(timestamp); false; end
95
+
96
+ ##
97
+ # Searches a pattern (word or regular expression) in text those
98
+ # matches to a given timestamp pattern. Returns an Array of
99
+ # search results. If no match, returns an empty Array.
100
+ #
101
+ # See the document for Repository#entries about a timestamp
102
+ # pattern. When nil is passed as a timestamp pattern, searching
103
+ # applies to all text in the repository.
104
+ #
105
+ # Each entry of the result Array is constructed from 3 items, (1)
106
+ # timestamp (Timestamp), (2) line number (Integer), (3) matched
107
+ # line (String).
108
+ #
109
+ # :call-seq:
110
+ # search(String for pattern, String for Timestamp pattern) -> Array
111
+
112
+ def search(pattern, stamp_pattern = nil); []; end
113
+
95
114
  end
96
115
 
97
116
  require_relative 'file_system_repository'
@@ -1,3 +1,3 @@
1
1
  module Textrepo
2
- VERSION = '0.4.5'
2
+ VERSION = '0.5.0'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textrepo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.5
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - mnbi
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-10-30 00:00:00.000000000 Z
11
+ date: 2020-11-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler