textrepo 0.4.5 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 24594803cdcc722b7a9ca01648a9d8955cb9637669ab1c459e653c2eb2d2c199
4
- data.tar.gz: bc5802a9a1df190d2278163b25d5ff1d2d107223acec17a3e713d1daf30a2218
3
+ metadata.gz: fa38f5fbd3d1fd393eeb4ac200d1c051d8ff082cd92beaf0ab1b3756efd79e92
4
+ data.tar.gz: 70107459347c9685f722a4aa2367c1f04ec3257c12c489a1436d242e2f322f4f
5
5
  SHA512:
6
- metadata.gz: 21ef95e13e30d816b671be203b7beffa1ab022aec94bfc9540a7712f548cbf5749a8f5dc8f13e1f22067f53f52ac63dd2ac75ffa32719d72deb49b6b410c05a3
7
- data.tar.gz: bef5bcfbef5a5557f80c44035dd42a9c8474cb04836e3328a6b0a7f0c065d5b601ff92bfcd4dee4a7a7c3273ac9e0648ae7a1e3bb52acdd95627de9266a04767
6
+ metadata.gz: 19e12a6e1ac352a005aca887e887089a8daa8ae8fa0573e3d640803f19950747addd52ed77ddb9da9534f7b9596cc125917c9892ea2a14a031aaae33df753e01
7
+ data.tar.gz: 3c304dbd6330719398ee377183ac26f29ffdfa89b279ff12fac795e9ad664e7b3b249c8f79d36e3bae215e7e114064dd01d3bb23b6652cd2147c8c09382d01ff
@@ -7,6 +7,35 @@ and this project adheres to [Semantic Versioning](https://semver.org/).
7
7
  ## [Unreleased]
8
8
  Nothing to record here.
9
9
 
10
+ ## [0.5.4] - 2020-11-05
11
+ ### Add
12
+ - Add a feature for `Repository#update` to keep timestamp unchanged
13
+ - add the third argument as:
14
+ - `Repository#update(timestamp, text, keep_stamp = false)`
15
+
16
+ ## [0.5.3] - 2020-11-03
17
+ ### Changed
18
+ - Fix issue #38: fix typo in code for FileSystemRepository.
19
+
20
+ ## [0.5.2] - 2020-11-03
21
+ ### Changed
22
+ - Fix issue #34:
23
+ - fix FileSystemRepository#entries to accept "yyyymo" pattern as a
24
+ Timestamp pattern.
25
+ - Fix issue #33: fix typo in the doc for FileSystemRepository.new.
26
+ - Fix issue #31: unfriendly error message of Timestamp.parse_s.
27
+
28
+ ## [0.5.1] - 2020-11-02
29
+ ### Changed
30
+ - Fix issue #28.
31
+ - Modify `Repository#update` to do nothing when the given text is
32
+ identical to the one in the repository.
33
+
34
+ ## [0.5.0] - 2020-11-01
35
+ ### Added
36
+ - Add a new API `Repository#search`.
37
+ - Add a new API `Repository#exist?`. (0.4.3)
38
+
10
39
  ## [0.4.0] - 2020-10-14
11
40
  ### Added
12
41
  - Released to rubygems.org.
@@ -2,19 +2,21 @@ module Textrepo
2
2
 
3
3
  ##
4
4
  # Following errors might occur in repository operations:
5
- # +--------------------------+---------------------+
6
- # | operation (args) | error type |
7
- # +--------------------------+---------------------+
8
- # | create (timestamp, text) | Duplicate timestamp |
9
- # | | Empty text |
10
- # +--------------------------+---------------------+
11
- # | read (timestamp) | Missing timestamp |
12
- # +--------------------------+---------------------+
13
- # | update (timestamp, text) | Mssing timestamp |
14
- # | | Empty text |
15
- # +--------------------------+---------------------+
16
- # | delete (timestamp) | Missing timestamp |
17
- # +--------------------------+---------------------+
5
+ # +---------------------------------+-----------------------+
6
+ # | operation (args) | error type |
7
+ # +---------------------------------+-----------------------+
8
+ # | create (timestamp, text) | Duplicate timestamp |
9
+ # | | Empty text |
10
+ # +---------------------------------+-----------------------+
11
+ # | read (timestamp) | Missing timestamp |
12
+ # +---------------------------------+-----------------------+
13
+ # | update (timestamp, text) | Mssing timestamp |
14
+ # | | Empty text |
15
+ # +---------------------------------+-----------------------+
16
+ # | delete (timestamp) | Missing timestamp |
17
+ # +---------------------------------+-----------------------+
18
+ # | search (pattern, stamp_pattern) | Invalid search result |
19
+ # +---------------------------------+-----------------------+
18
20
 
19
21
  class Error < StandardError; end
20
22
 
@@ -25,6 +27,7 @@ module Textrepo
25
27
  EMPTY_TEXT = 'empty text'
26
28
  MISSING_TIMESTAMP = 'missing timestamp: %s'
27
29
  INVALID_TIMESTAMP_STRING = "invalid string as timestamp: %s"
30
+ INVALID_SEARCH_RESULT = "invalid result by searcher: %s"
28
31
  end
29
32
  # :startdoc:
30
33
 
@@ -77,4 +80,14 @@ module Textrepo
77
80
  end
78
81
  end
79
82
 
83
+ ##
84
+ # An error raise if the search result is not suitable to use.
85
+ #
86
+
87
+ class InvalidSearchResultError < Error
88
+ def initialize(str)
89
+ super(ErrMsg::INVALID_SEARCH_RESULT % str)
90
+ end
91
+ end
92
+
80
93
  end
@@ -1,4 +1,5 @@
1
1
  require 'fileutils'
2
+ require "open3"
2
3
 
3
4
  module Textrepo
4
5
 
@@ -19,6 +20,16 @@ module Textrepo
19
20
 
20
21
  attr_reader :extname
21
22
 
23
+ ##
24
+ # Searcher program name.
25
+
26
+ attr_reader :searcher
27
+
28
+ ##
29
+ # An array of options to pass to the searcher program.
30
+
31
+ attr_reader :searcher_options
32
+
22
33
  ##
23
34
  # Default name for the repository which uses when no name is
24
35
  # specified in the configuration settings.
@@ -31,6 +42,11 @@ module Textrepo
31
42
 
32
43
  FAVORITE_EXTNAME = 'md'
33
44
 
45
+ ##
46
+ # Default searcher program to search text in the repository.
47
+
48
+ FAVORITE_SEARCHER = 'grep'
49
+
34
50
  ##
35
51
  # Creates a new repository object. The argument, `conf` must be a
36
52
  # Hash object. It should hold the follwoing values:
@@ -41,15 +57,33 @@ module Textrepo
41
57
  # - OPTIONAL: (if not specified, default values are used)
42
58
  # - :repository_name => basename of the root path for the repository
43
59
  # - :default_extname => extname for a file stored into in the repository
60
+ # - :searcher => a program to search like `grep`
61
+ # - :searcher_options => an Array of option to pass to the searcher
44
62
  #
45
63
  # The root path of the repository looks like the following:
46
64
  # - conf[:repository_base]/conf[:repository_name]
47
65
  #
48
- # Default values are set when `repository_name` and `default_extname`
66
+ # Default values are set when `:repository_name` and `:default_extname`
49
67
  # were not defined in `conf`.
50
68
  #
69
+ # Be careful to set `:searcher_options`, it must be to specify the
70
+ # searcher behavior equivalent to `grep` with "-inRE". The
71
+ # default values for the searcher options is defined for BSD grep
72
+ # (default grep on macOS), GNU grep, and ripgrep (aka rg). They
73
+ # are:
74
+ #
75
+ # "grep" => ["-i", "-n", "-R", "-E"]
76
+ # "egrep" => ["-i", "-n", "-R"]
77
+ # "ggrep" => ["-i", "-n", "-R", "-E"]
78
+ # "gegrep" => ["-i", "-n", "-R"]
79
+ # "rg" => ["-S", "-n", "--no-heading", "--color", "never"]
80
+ #
81
+ # If use those searchers, it is not recommended to set
82
+ # `:searcher_options`. The default value works well in
83
+ # `textrepo`.
84
+ #
51
85
  # :call-seq:
52
- # new(Rbnotes::Conf or Hash) -> FileSystemRepository
86
+ # new(Hash or Hash like object) -> FileSystemRepository
53
87
 
54
88
  def initialize(conf)
55
89
  super
@@ -58,6 +92,8 @@ module Textrepo
58
92
  @path = File.expand_path("#{name}", base)
59
93
  FileUtils.mkdir_p(@path)
60
94
  @extname = conf[:default_extname] || FAVORITE_EXTNAME
95
+ @searcher = find_searcher(conf[:searcher])
96
+ @searcher_options = conf[:searcher_options]
61
97
  end
62
98
 
63
99
  ##
@@ -94,26 +130,32 @@ module Textrepo
94
130
  end
95
131
 
96
132
  ##
97
- # Updates the file content in the repository. A new timestamp
98
- # will be attached to the text.
133
+ # Updates the file content in the repository. A new Timestamp
134
+ # object will be attached to the text. Then, returns the new
135
+ # Timestamp object.
136
+ #
137
+ # When true is passed as the third argument, keeps the Timestamp
138
+ # unchanged, though updates the content. Then, returns the given
139
+ # Timestamp object.
140
+ #
141
+ # See the documentation of Repository#update to know about errors
142
+ # and constraints of this method.
99
143
  #
100
144
  # :call-seq:
101
- # update(Timestamp, Array) -> Timestamp
145
+ # update(Timestamp, Array, true or false) -> Timestamp
102
146
 
103
- def update(timestamp, text)
147
+ def update(timestamp, text, keep_stamp = false)
104
148
  raise EmptyTextError if text.empty?
105
- org_abs = abspath(timestamp)
106
- raise MissingTimestampError, timestamp unless FileTest.exist?(org_abs)
149
+ raise MissingTimestampError, timestamp unless exist?(timestamp)
107
150
 
108
- # the text must be stored with the new timestamp
109
- new_stamp = Timestamp.new(Time.now)
110
- new_abs = abspath(new_stamp)
111
- write_text(new_abs, text)
151
+ # does nothing if given text is the same in the repository one
152
+ return timestamp if read(timestamp) == text
112
153
 
113
- # delete the original file in the repository
114
- FileUtils.remove_file(org_abs)
154
+ stamp = keep_stamp ? timestamp : Timestamp.new(Time.now)
155
+ write_text(abspath(stamp), text)
156
+ FileUtils.remove_file(abspath(timestamp)) unless keep_stamp
115
157
 
116
- new_stamp
158
+ stamp
117
159
  end
118
160
 
119
161
  ##
@@ -147,7 +189,7 @@ module Textrepo
147
189
  if exist?(stamp)
148
190
  results << stamp
149
191
  end
150
- when 0, "yyyymoddhhmiss".size, "yyyymodd".size
192
+ when 0, "yyyymoddhhmiss".size, "yyyymodd".size, "yyyymo".size
151
193
  results += find_entries(stamp_pattern)
152
194
  when 4 # "yyyy" or "modd"
153
195
  pat = nil
@@ -179,6 +221,27 @@ module Textrepo
179
221
  FileTest.exist?(abspath(timestamp))
180
222
  end
181
223
 
224
+ ##
225
+ # Searches a pattern in all text. The given pattern is a word to
226
+ # search or a regular expression. The pattern would be passed to
227
+ # a searcher program as it passed.
228
+ #
229
+ # See the document for Textrepo::Repository#search to know about
230
+ # the search result.
231
+ #
232
+ # :call-seq:
233
+ # search(String for pattern, String for Timestamp pattern) -> Array
234
+
235
+ def search(pattern, stamp_pattern = nil)
236
+ result = nil
237
+ if stamp_pattern.nil?
238
+ result = invoke_searcher_at_repo_root(@searcher, pattern)
239
+ else
240
+ result = invoke_searcher_for_entries(@searcher, pattern, entries(stamp_pattern))
241
+ end
242
+ construct_search_result(result)
243
+ end
244
+
182
245
  # :stopdoc:
183
246
 
184
247
  private
@@ -219,6 +282,142 @@ module Textrepo
219
282
  }.compact
220
283
  end
221
284
 
285
+ ##
286
+ # The upper limit of files to search at one time. The value has
287
+ # no reason to select. It seems to me that not too much, not too
288
+ # little to handle in one process to search.
289
+
290
+ LIMIT_OF_FILES = 20
291
+
292
+ ##
293
+ # When no timestamp pattern was given, invoke the searcher with
294
+ # the repository root path as its argument and the recursive
295
+ # searching option. The search could be done in only one process.
296
+
297
+ def invoke_searcher_at_repo_root(searcher, pattern)
298
+ o, s = Open3.capture2(searcher, *find_searcher_options(searcher),
299
+ pattern, @path)
300
+ output = []
301
+ output += o.lines.map(&:chomp) if s.success? && (! o.empty?)
302
+ output
303
+ end
304
+
305
+ ##
306
+ # When a timestamp pattern was given, at first, list target files,
307
+ # then invoke the searcher for those files. Since the number of
308
+ # target files may be so much, it seems to be dangerous to pass
309
+ # all of them to a single search process at one time.
310
+ #
311
+ # One more thing to mention, the searcher, like `grep`, does not
312
+ # add the filename at the beginning of the search result line, if
313
+ # the target is one file. This behavior is not suitable in this
314
+ # purpose. The code below adds the filename when the target is
315
+ # one file.
316
+
317
+ def invoke_searcher_for_entries(searcher, pattern, entries)
318
+ output = []
319
+
320
+ num_of_entries = entries.size
321
+ if num_of_entries == 1
322
+ # If the search taget is one file, the output needs special
323
+ # treatment.
324
+ file = abspath(entries[0])
325
+ o, s = Open3.capture2(searcher, *find_searcher_options(searcher),
326
+ pattern, file)
327
+ if s.success? && (! o.empty?)
328
+ output += o.lines.map { |line|
329
+ # add filename at the beginning of the search result line
330
+ [file, line.chomp].join(":")
331
+ }
332
+ end
333
+ elsif num_of_entries > LIMIT_OF_FILES
334
+ output += invoke_searcher_for_entries(searcher, pattern, entries[0..(LIMIT_OF_FILES - 1)])
335
+ output += invoke_searcher_for_entries(searcher, pattern, entries[LIMIT_OF_FILES..-1])
336
+ else
337
+ # When the number of target is less than the upper limit,
338
+ # invoke the searcher with all of target files as its
339
+ # arguments.
340
+ files = find_files(entries)
341
+ o, s = Open3.capture2(searcher, *find_searcher_options(searcher),
342
+ pattern, *files)
343
+ if s.success? && (! o.empty?)
344
+ output += o.lines.map(&:chomp)
345
+ end
346
+ end
347
+
348
+ output
349
+ end
350
+
351
+ SEARCHER_OPTS = {
352
+ # case insensitive, print line number, recursive search, work as egrep
353
+ "grep" => ["-i", "-n", "-R", "-E"],
354
+ # case insensitive, print line number, recursive search
355
+ "egrep" => ["-i", "-n", "-R"],
356
+ # case insensitive, print line number, recursive search, work as gegrep
357
+ "ggrep" => ["-i", "-n", "-R", "-E"],
358
+ # case insensitive, print line number, recursive search
359
+ "gegrep" => ["-i", "-n", "-R"],
360
+ # smart case, print line number, no color
361
+ "rg" => ["-S", "-n", "--no-heading", "--color", "never"],
362
+ }
363
+
364
+ def find_searcher_options(searcher)
365
+ @searcher_options || SEARCHER_OPTS[File.basename(searcher)] || ""
366
+ end
367
+
368
+ def find_files(timestamps)
369
+ timestamps.map{|stamp| abspath(stamp)}
370
+ end
371
+
372
+ ##
373
+ # The argument must be an Array contains the searcher output.
374
+ # Each item is constructed from 3 parts:
375
+ # "<pathname>:<integer>:<text>"
376
+ #
377
+ # For example, it may looks like:
378
+ #
379
+ # "/somewhere/2020/11/20201101044300.md:18:foo is foo"
380
+ #
381
+ # Or it may contains more ":" in the text part as:
382
+ #
383
+ # "/somewhere/2020/11/20201101044500.md:119:apple:orange:grape"
384
+ #
385
+ # In the latter case, `split(":")` will split it too much. That is,
386
+ # the result will be:
387
+ #
388
+ # ["/somewhere/2020/11/20201101044500.md", "119", "apple", "orange", "grape"]
389
+ #
390
+ # Text part must be joined with ":".
391
+
392
+ def construct_search_result(output)
393
+ output.map { |line|
394
+ begin
395
+ pathname, num, *match_text = line.split(":")
396
+ [Timestamp.parse_s(timestamp_str(pathname)),
397
+ num.to_i,
398
+ match_text.join(":")]
399
+ rescue InvalidTimestampStringError, TypeError => _
400
+ raise InvalidSearchResultError, [@searcher, @searcher_options.join(" ")].join(" ")
401
+ end
402
+ }.compact
403
+ end
404
+
405
+ def find_searcher(program = nil)
406
+ candidates = [FAVORITE_SEARCHER]
407
+ candidates.unshift(program) unless program.nil? || candidates.include?(program)
408
+ search_paths = ENV["PATH"].split(":")
409
+ candidates.map { |prog|
410
+ find_in_paths(prog, search_paths)
411
+ }[0]
412
+ end
413
+
414
+ def find_in_paths(prog, paths)
415
+ paths.each { |p|
416
+ abspath = File.expand_path(prog, p)
417
+ return abspath if FileTest.exist?(abspath) && FileTest.executable?(abspath)
418
+ }
419
+ nil
420
+ end
222
421
  # :startdoc:
223
422
 
224
423
  end
@@ -43,13 +43,27 @@ module Textrepo
43
43
  def read(timestamp); []; end
44
44
 
45
45
  ##
46
- # Updates the content with text in the repository, which is
47
- # associated to the timestamp. Returns the timestamp.
46
+ # Updates the content with given text in the repository, which is
47
+ # associated to the given Timestamp object. Returns the Timestamp
48
+ # newly generated during the execution.
49
+ #
50
+ # When true is passed as the third argument, keeps the Timestamp
51
+ # unchanged, though updates the content. Then, returns the given
52
+ # Timestamp object.
53
+ #
54
+ # If the given Timestamp object is not existed as a Timestamp
55
+ # attached to text in the repository, raises
56
+ # MissingTimestampError.
57
+ #
58
+ # If the given text is empty, raises EmptyTextError.
59
+ #
60
+ # If the given text is identical to the text in the repository,
61
+ # does nothing. Returns the given timestamp itself.
48
62
  #
49
63
  # :call-seq:
50
- # update(Timestamp, Array) -> Timestamp
64
+ # update(Timestamp, Array, true or false) -> Timestamp
51
65
 
52
- def update(timestamp, text); timestamp; end
66
+ def update(timestamp, text, keep_stamp = false); timestamp; end
53
67
 
54
68
  ##
55
69
  # Deletes the content in the repository, which is associated to
@@ -92,6 +106,25 @@ module Textrepo
92
106
  # exist?(Timestamp) -> true or false
93
107
 
94
108
  def exist?(timestamp); false; end
109
+
110
+ ##
111
+ # Searches a pattern (word or regular expression) in text those
112
+ # matches to a given timestamp pattern. Returns an Array of
113
+ # search results. If no match, returns an empty Array.
114
+ #
115
+ # See the document for Repository#entries about a timestamp
116
+ # pattern. When nil is passed as a timestamp pattern, searching
117
+ # applies to all text in the repository.
118
+ #
119
+ # Each entry of the result Array is constructed from 3 items, (1)
120
+ # timestamp (Timestamp), (2) line number (Integer), (3) matched
121
+ # line (String).
122
+ #
123
+ # :call-seq:
124
+ # search(String for pattern, String for Timestamp pattern) -> Array
125
+
126
+ def search(pattern, stamp_pattern = nil); []; end
127
+
95
128
  end
96
129
 
97
130
  require_relative 'file_system_repository'
@@ -98,7 +98,14 @@ module Textrepo
98
98
  ye, mo, da, ho, mi, se, sfx = split_stamp(stamp_str).map(&:to_i)
99
99
  Timestamp.new(Time.new(ye, mo, da, ho, mi, se), sfx)
100
100
  rescue InvalidTimestampStringError, ArgumentError => _
101
- raise InvalidTimestampStringError, stamp_str
101
+ emsg = if stamp_str.nil?
102
+ "(nil)"
103
+ elsif stamp_str.empty?
104
+ "(empty string)"
105
+ else
106
+ stamp_str
107
+ end
108
+ raise InvalidTimestampStringError, emsg
102
109
  end
103
110
  end
104
111
 
@@ -1,3 +1,3 @@
1
1
  module Textrepo
2
- VERSION = '0.4.5'
2
+ VERSION = '0.5.4'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textrepo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.5
4
+ version: 0.5.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - mnbi
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-10-30 00:00:00.000000000 Z
11
+ date: 2020-11-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler