textrepo 0.4.5 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 24594803cdcc722b7a9ca01648a9d8955cb9637669ab1c459e653c2eb2d2c199
4
- data.tar.gz: bc5802a9a1df190d2278163b25d5ff1d2d107223acec17a3e713d1daf30a2218
3
+ metadata.gz: dc5cf6089b4883c93dc228e19aa0a149d71a8d9cc26a92e6c75fdc4ee0b2d694
4
+ data.tar.gz: 8ebecace02d486b6b6c12256d52adfd44963a3a14f6d39729b83d426bce3a26e
5
5
  SHA512:
6
- metadata.gz: 21ef95e13e30d816b671be203b7beffa1ab022aec94bfc9540a7712f548cbf5749a8f5dc8f13e1f22067f53f52ac63dd2ac75ffa32719d72deb49b6b410c05a3
7
- data.tar.gz: bef5bcfbef5a5557f80c44035dd42a9c8474cb04836e3328a6b0a7f0c065d5b601ff92bfcd4dee4a7a7c3273ac9e0648ae7a1e3bb52acdd95627de9266a04767
6
+ metadata.gz: aef27bf0363a66eeb1676ccda0682c253155875a8d0c16b27189674836edf2563b8df87b7f3b56cdb5d02c895f724f47a956588a5d97d24d83cdddce76fc083e
7
+ data.tar.gz: 81a4f8a76eb0ec8545e29ddbd537d49ec00f6009f5617dad61c61d8a5552c6df090d6d37fd507a7976c5f5f60b7d0c8b43f9a99d98d7efa1e933828270711d87
@@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/).
7
7
  ## [Unreleased]
8
8
  Nothing to record here.
9
9
 
10
+ ## [0.5.0] - 2020-11-01
11
+ ### Added
12
+ - Add a new API `Repository#search`.
13
+ - Add a new API `Repository#exist?`. (0.4.3)
14
+
10
15
  ## [0.4.0] - 2020-10-14
11
16
  ### Added
12
17
  - Released to rubygems.org.
@@ -2,19 +2,21 @@ module Textrepo
2
2
 
3
3
  ##
4
4
  # Following errors might occur in repository operations:
5
- # +--------------------------+---------------------+
6
- # | operation (args) | error type |
7
- # +--------------------------+---------------------+
8
- # | create (timestamp, text) | Duplicate timestamp |
9
- # | | Empty text |
10
- # +--------------------------+---------------------+
11
- # | read (timestamp) | Missing timestamp |
12
- # +--------------------------+---------------------+
13
- # | update (timestamp, text) | Mssing timestamp |
14
- # | | Empty text |
15
- # +--------------------------+---------------------+
16
- # | delete (timestamp) | Missing timestamp |
17
- # +--------------------------+---------------------+
5
+ # +---------------------------------+-----------------------+
6
+ # | operation (args) | error type |
7
+ # +---------------------------------+-----------------------+
8
+ # | create (timestamp, text) | Duplicate timestamp |
9
+ # | | Empty text |
10
+ # +---------------------------------+-----------------------+
11
+ # | read (timestamp) | Missing timestamp |
12
+ # +---------------------------------+-----------------------+
13
+ # | update (timestamp, text) | Mssing timestamp |
14
+ # | | Empty text |
15
+ # +---------------------------------+-----------------------+
16
+ # | delete (timestamp) | Missing timestamp |
17
+ # +---------------------------------+-----------------------+
18
+ # | search (pattern, stamp_pattern) | Invalid search result |
19
+ # +---------------------------------+-----------------------+
18
20
 
19
21
  class Error < StandardError; end
20
22
 
@@ -25,6 +27,7 @@ module Textrepo
25
27
  EMPTY_TEXT = 'empty text'
26
28
  MISSING_TIMESTAMP = 'missing timestamp: %s'
27
29
  INVALID_TIMESTAMP_STRING = "invalid string as timestamp: %s"
30
+ INVALID_SEARCH_RESULT = "invalid result by searcher: %s"
28
31
  end
29
32
  # :startdoc:
30
33
 
@@ -77,4 +80,14 @@ module Textrepo
77
80
  end
78
81
  end
79
82
 
83
+ ##
84
+ # An error raise if the search result is not suitable to use.
85
+ #
86
+
87
+ class InvalidSearchResultError < Error
88
+ def initialize(str)
89
+ super(ErrMsg::INVALID_SEARCH_RESULT % str)
90
+ end
91
+ end
92
+
80
93
  end
@@ -1,4 +1,5 @@
1
1
  require 'fileutils'
2
+ require "open3"
2
3
 
3
4
  module Textrepo
4
5
 
@@ -19,6 +20,16 @@ module Textrepo
19
20
 
20
21
  attr_reader :extname
21
22
 
23
+ ##
24
+ # Searcher program name.
25
+
26
+ attr_reader :searcher
27
+
28
+ ##
29
+ # An array of options to pass to the searcher program.
30
+
31
+ attr_reader :searcher_options
32
+
22
33
  ##
23
34
  # Default name for the repository which uses when no name is
24
35
  # specified in the configuration settings.
@@ -31,6 +42,11 @@ module Textrepo
31
42
 
32
43
  FAVORITE_EXTNAME = 'md'
33
44
 
45
+ ##
46
+ # Default searcher program to search text in the repository.
47
+
48
+ FAVORITE_SEARCHER = 'grep'
49
+
34
50
  ##
35
51
  # Creates a new repository object. The argument, `conf` must be a
36
52
  # Hash object. It should hold the follwoing values:
@@ -41,15 +57,32 @@ module Textrepo
41
57
  # - OPTIONAL: (if not specified, default values are used)
42
58
  # - :repository_name => basename of the root path for the repository
43
59
  # - :default_extname => extname for a file stored into in the repository
60
+ # - :searcher => a program to search like `grep`
61
+ # - :searcher_options => an Array of option to pass to the searcher
44
62
  #
45
63
  # The root path of the repository looks like the following:
46
64
  # - conf[:repository_base]/conf[:repository_name]
47
65
  #
48
- # Default values are set when `repository_name` and `default_extname`
66
+ # Default values are set when `:repository_name` and `:default_extname`
49
67
  # were not defined in `conf`.
50
68
  #
69
+ # Be careful to set `:searcher_options`, it must be to specify the
70
+ # searcher behavior equivalent to `grep` with "-inR". The default
71
+ # value for the searcher options is defined for BSD grep (default
72
+ # grep on macOS), GNU grep, and ripgrep (aka rg). They are:
73
+ #
74
+ # "grep" => ["-i", "-n", "-R", "-E"]
75
+ # "egrep" => ["-i", "-n", "-R"]
76
+ # "ggrep" => ["-i", "-n", "-R", "-E"]
77
+ # "gegrep" => ["-i", "-n", "-R"]
78
+ # "rg" => ["-S", "-n", "--no-heading", "--color", "never"]
79
+ #
80
+ # If use those 3 searchers, it is not recommended to set
81
+ # `:searcher_options`. The default value works well in
82
+ # `textrepo`.
83
+ #
51
84
  # :call-seq:
52
- # new(Rbnotes::Conf or Hash) -> FileSystemRepository
85
+ # new(Hash or Hash like object) -> FileSystemRepository
53
86
 
54
87
  def initialize(conf)
55
88
  super
@@ -58,6 +91,8 @@ module Textrepo
58
91
  @path = File.expand_path("#{name}", base)
59
92
  FileUtils.mkdir_p(@path)
60
93
  @extname = conf[:default_extname] || FAVORITE_EXTNAME
94
+ @searcher = find_searcher(conf[:searcher])
95
+ @searcher_options = conf[:searcher_options]
61
96
  end
62
97
 
63
98
  ##
@@ -179,6 +214,27 @@ module Textrepo
179
214
  FileTest.exist?(abspath(timestamp))
180
215
  end
181
216
 
217
+ ##
218
+ # Searches a pattern in all text. The given pattern is a word to
219
+ # search or a regular expression. The pattern would be passed to
220
+ # a searcher program as it passed.
221
+ #
222
+ # See the document for Textrepo::Repository#search to know about
223
+ # the search result.
224
+ #
225
+ # :call-seq:
226
+ # search(String for pattern, String for Timestamp pattern) -> Array
227
+
228
+ def search(pattern, stamp_pattern = nil)
229
+ result = nil
230
+ if stamp_pattern.nil?
231
+ result = invoke_searcher_at_repo_root(@searcher, pattern)
232
+ else
233
+ result = invoke_searcher_for_entries(@searcher, pattern, entries(stamp_pattern))
234
+ end
235
+ construct_search_result(result)
236
+ end
237
+
182
238
  # :stopdoc:
183
239
 
184
240
  private
@@ -219,6 +275,142 @@ module Textrepo
219
275
  }.compact
220
276
  end
221
277
 
278
+ ##
279
+ # The upper limit of files to search at one time. The value has
280
+ # no reason to select. It seems to me that not too much, not too
281
+ # little to handle in one process to search.
282
+
283
+ LIMIT_OF_FILES = 20
284
+
285
+ ##
286
+ # When no timestamp pattern was given, invoke the searcher with
287
+ # the repository root path as its argument and the recursive
288
+ # searching option. The search could be done in only one process.
289
+
290
+ def invoke_searcher_at_repo_root(searcher, pattern)
291
+ o, s = Open3.capture2(searcher, *find_searcher_options(searcher),
292
+ pattern, @path)
293
+ output = []
294
+ output += o.lines.map(&:chomp) if s.success? && (! o.empty?)
295
+ output
296
+ end
297
+
298
+ ##
299
+ # When a timestamp pattern was given, at first, list target files,
300
+ # then invoke the searcher for those files. Since the number of
301
+ # target files may be so much, it seems to be dangerous to pass
302
+ # all of them to a single search process at one time.
303
+ #
304
+ # One more thing to mention, the searcher, like `grep`, does not
305
+ # add the filename at the beginning of the search result line, if
306
+ # the target is one file. This behavior is not suitable in this
307
+ # purpose. The code below adds the filename when the target is
308
+ # one file.
309
+
310
+ def invoke_searcher_for_entries(searcher, pattern, entries)
311
+ output = []
312
+
313
+ num_of_entries = entries.size
314
+ if num_of_entries == 1
315
+ # If the search taget is one file, the output needs special
316
+ # treatment.
317
+ file = abspath(entries[0])
318
+ o, s = Open3.capture2(searcher, *find_searcher_options(searcher),
319
+ pattern, file)
320
+ if s.success? && (! o.empty)
321
+ output += o.lines.map { |line|
322
+ # add filename at the beginning of the search result line
323
+ [file, line.chomp].join(":")
324
+ }
325
+ end
326
+ elsif num_of_entries > LIMIT_OF_FILES
327
+ output += invoke_searcher_for_entries(searcher, pattern, entries[0..(LIMIT_OF_FILES - 1)])
328
+ output += invoke_searcher_for_entries(searcher, pattern, entries[LIMIT_OF_FILES..-1])
329
+ else
330
+ # When the number of target is less than the upper limit,
331
+ # invoke the searcher with all of target files as its
332
+ # arguments.
333
+ files = find_files(entries)
334
+ o, s = Open3.capture2(searcher, *find_searcher_options(searcher),
335
+ pattern, *files)
336
+ if s.success? && (! o.empty)
337
+ output += o.lines.map(&:chomp)
338
+ end
339
+ end
340
+
341
+ output
342
+ end
343
+
344
+ SEARCHER_OPTS = {
345
+ # case insensitive, print line number, recursive search, work as egrep
346
+ "grep" => ["-i", "-n", "-R", "-E"],
347
+ # case insensitive, print line number, recursive search
348
+ "egrep" => ["-i", "-n", "-R"],
349
+ # case insensitive, print line number, recursive search, work as gegrep
350
+ "ggrep" => ["-i", "-n", "-R", "-E"],
351
+ # case insensitive, print line number, recursive search
352
+ "gegrep" => ["-i", "-n", "-R"],
353
+ # smart case, print line number, no color
354
+ "rg" => ["-S", "-n", "--no-heading", "--color", "never"],
355
+ }
356
+
357
+ def find_searcher_options(searcher)
358
+ @searcher_options || SEARCHER_OPTS[File.basename(searcher)] || ""
359
+ end
360
+
361
+ def find_files(timestamps)
362
+ timestamps.map{|stamp| abspath(stamp)}
363
+ end
364
+
365
+ ##
366
+ # The argument must be an Array contains the searcher output.
367
+ # Each item is constructed from 3 parts:
368
+ # "<pathname>:<integer>:<text>"
369
+ #
370
+ # For example, it may looks like:
371
+ #
372
+ # "/somewhere/2020/11/20201101044300.md:18:foo is foo"
373
+ #
374
+ # Or it may contains more ":" in the text part as:
375
+ #
376
+ # "/somewhere/2020/11/20201101044500.md:119:apple:orange:grape"
377
+ #
378
+ # In the latter case, `split(":")` will split it too much. That is,
379
+ # the result will be:
380
+ #
381
+ # ["/somewhere/2020/11/20201101044500.md", "119", "apple", "orange", "grape"]
382
+ #
383
+ # Text part must be joined with ":".
384
+
385
+ def construct_search_result(output)
386
+ output.map { |line|
387
+ begin
388
+ pathname, num, *match_text = line.split(":")
389
+ [Timestamp.parse_s(timestamp_str(pathname)),
390
+ num.to_i,
391
+ match_text.join(":")]
392
+ rescue InvalidTimestampStringError, TypeError => _
393
+ raise InvalidSearchResultError, [@searcher, @searcher_options.join(" ")].join(" ")
394
+ end
395
+ }.compact
396
+ end
397
+
398
+ def find_searcher(program = nil)
399
+ candidates = [FAVORITE_SEARCHER]
400
+ candidates.unshift(program) unless program.nil? || candidates.include?(program)
401
+ search_paths = ENV["PATH"].split(":")
402
+ candidates.map { |prog|
403
+ find_in_paths(prog, search_paths)
404
+ }[0]
405
+ end
406
+
407
+ def find_in_paths(prog, paths)
408
+ paths.each { |p|
409
+ abspath = File.expand_path(prog, p)
410
+ return abspath if FileTest.exist?(abspath) && FileTest.executable?(abspath)
411
+ }
412
+ nil
413
+ end
222
414
  # :startdoc:
223
415
 
224
416
  end
@@ -92,6 +92,25 @@ module Textrepo
92
92
  # exist?(Timestamp) -> true or false
93
93
 
94
94
  def exist?(timestamp); false; end
95
+
96
+ ##
97
+ # Searches a pattern (word or regular expression) in text those
98
+ # matches to a given timestamp pattern. Returns an Array of
99
+ # search results. If no match, returns an empty Array.
100
+ #
101
+ # See the document for Repository#entries about a timestamp
102
+ # pattern. When nil is passed as a timestamp pattern, searching
103
+ # applies to all text in the repository.
104
+ #
105
+ # Each entry of the result Array is constructed from 3 items, (1)
106
+ # timestamp (Timestamp), (2) line number (Integer), (3) matched
107
+ # line (String).
108
+ #
109
+ # :call-seq:
110
+ # search(String for pattern, String for Timestamp pattern) -> Array
111
+
112
+ def search(pattern, stamp_pattern = nil); []; end
113
+
95
114
  end
96
115
 
97
116
  require_relative 'file_system_repository'
@@ -1,3 +1,3 @@
1
1
  module Textrepo
2
- VERSION = '0.4.5'
2
+ VERSION = '0.5.0'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textrepo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.5
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - mnbi
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-10-30 00:00:00.000000000 Z
11
+ date: 2020-11-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler