textrepo 0.4.4 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7415dcbc8ec9483a33c0027432a60473fd0b77ad67369b25b88fb0f399edd788
4
- data.tar.gz: 9467336f5d93ec578d5693005a4737f4eef55aeb11e4c0a4ec12f197e81de240
3
+ metadata.gz: b8a737ed78449fe9562b18fa9a1e6f5a06e8adb7e460dd5fd35ac9f9a5127e6e
4
+ data.tar.gz: af06e6e6e6c004253452a8c8871de26b93e800ac090c0cf3d1c446ea498bd386
5
5
  SHA512:
6
- metadata.gz: f989032a0f1db66aeb1392cf9b05fd89306152d1b76907953c90ec14839d7a34465a017a2be8017536270b9a071cd3027497305978fdf59681d3547c8a68a5e5
7
- data.tar.gz: cfb126c03ce9720dd2f2e90453a5d2baf33160c7786a8a1d9b5ef02b2c4a4e2531de5744d255efd7979a39e87c9e93287de3397533404143b51be1eb8bb23a58
6
+ metadata.gz: e433bbb36d848f98c0f75999fecd01b29e69e9767e1d11d32897e78b50394c2787087deed5718e2266d81390736435d25e48285b47087179d64411a8a56ad21a
7
+ data.tar.gz: 414bdb550479be8dbb7dff29569b04064cb1a40704de8fad3d36c90ffd9ffcf73b69b4a651987245a481878e27a62e8251399ede6f81b5fb4c16c173a76da4e3
@@ -7,6 +7,29 @@ and this project adheres to [Semantic Versioning](https://semver.org/).
7
7
  ## [Unreleased]
8
8
  Nothing to record here.
9
9
 
10
+ ## [0.5.3] - 2020-11-03
11
+ ### Changed
12
+ - Fix issue #38: fix typo in code for FileSystemRepository.
13
+
14
+ ## [0.5.2] - 2020-11-03
15
+ ### Changed
16
+ - Fix issue #34:
17
+ - fix FileSystemRepository#entries to accept "yyyymo" pattern as a
18
+ Timestamp pattern.
19
+ - Fix issue #33: fix typo in the doc for FileSystemRepository.new.
20
+ - Fix issue #31: unfriendly error message of Timestamp.parse_s.
21
+
22
+ ## [0.5.1] - 2020-11-02
23
+ ### Changed
24
+ - Fix issue #28.
25
+ - Modify `Repository#update` to do nothing when the given text is
26
+ identical to the one in the repository.
27
+
28
+ ## [0.5.0] - 2020-11-01
29
+ ### Added
30
+ - Add a new API `Repository#search`.
31
+ - Add a new API `Repository#exist?`. (0.4.3)
32
+
10
33
  ## [0.4.0] - 2020-10-14
11
34
  ### Added
12
35
  - Released to rubygems.org.
@@ -1,12 +1,39 @@
1
1
  module Textrepo
2
+
3
+ ##
4
+ # Following errors might occur in repository operations:
5
+ # +---------------------------------+-----------------------+
6
+ # | operation (args) | error type |
7
+ # +---------------------------------+-----------------------+
8
+ # | create (timestamp, text) | Duplicate timestamp |
9
+ # | | Empty text |
10
+ # +---------------------------------+-----------------------+
11
+ # | read (timestamp) | Missing timestamp |
12
+ # +---------------------------------+-----------------------+
13
+ # | update (timestamp, text) | Mssing timestamp |
14
+ # | | Empty text |
15
+ # +---------------------------------+-----------------------+
16
+ # | delete (timestamp) | Missing timestamp |
17
+ # +---------------------------------+-----------------------+
18
+ # | search (pattern, stamp_pattern) | Invalid search result |
19
+ # +---------------------------------+-----------------------+
20
+
2
21
  class Error < StandardError; end
3
22
 
23
+ # :stopdoc:
4
24
  module ErrMsg
5
25
  UNKNOWN_REPO_TYPE = 'unknown type for repository: %s'
6
26
  DUPLICATE_TIMESTAMP = 'duplicate timestamp: %s'
7
27
  EMPTY_TEXT = 'empty text'
8
28
  MISSING_TIMESTAMP = 'missing timestamp: %s'
29
+ INVALID_TIMESTAMP_STRING = "invalid string as timestamp: %s"
30
+ INVALID_SEARCH_RESULT = "invalid result by searcher: %s"
9
31
  end
32
+ # :startdoc:
33
+
34
+ ##
35
+ # An error raised if unknown type was specified as the repository
36
+ # type.
10
37
 
11
38
  class UnknownRepoTypeError < Error
12
39
  def initialize(type)
@@ -14,20 +41,9 @@ module Textrepo
14
41
  end
15
42
  end
16
43
 
17
- # Following errors might occur in repository operations:
18
- # +--------------------------+---------------------+
19
- # | operation (args) | error type |
20
- # +--------------------------+---------------------+
21
- # | create (timestamp, text) | Duplicate timestamp |
22
- # | | Empty text |
23
- # +--------------------------+---------------------+
24
- # | read (timestamp) | Missing timestamp |
25
- # +--------------------------+---------------------+
26
- # | update (timestamp, text) | Mssing timestamp |
27
- # | | Empty text |
28
- # +--------------------------+---------------------+
29
- # | delete (timestamp) | Missing timestamp |
30
- # +--------------------------+---------------------+
44
+ ##
45
+ # An error raised if the specified timestamp has already exist in
46
+ # the repository.
31
47
 
32
48
  class DuplicateTimestampError < Error
33
49
  def initialize(timestamp)
@@ -35,16 +51,43 @@ module Textrepo
35
51
  end
36
52
  end
37
53
 
54
+ ##
55
+ # An error raised if the given text is empty.
56
+
38
57
  class EmptyTextError < Error
39
58
  def initialize
40
59
  super(ErrMsg::EMPTY_TEXT)
41
60
  end
42
61
  end
43
62
 
63
+ ##
64
+ # An error raised if the given timestamp has not exist in the
65
+ # repository.
66
+
44
67
  class MissingTimestampError < Error
45
68
  def initialize(timestamp)
46
69
  super(ErrMsg::MISSING_TIMESTAMP % timestamp)
47
70
  end
48
71
  end
49
72
 
73
+ ##
74
+ # An error raised if an argument is invalid to convert a
75
+ # Textrepo::Timestamp object.
76
+
77
+ class InvalidTimestampStringError < Error
78
+ def initialize(str)
79
+ super(ErrMsg::INVALID_TIMESTAMP_STRING % str)
80
+ end
81
+ end
82
+
83
+ ##
84
+ # An error raise if the search result is not suitable to use.
85
+ #
86
+
87
+ class InvalidSearchResultError < Error
88
+ def initialize(str)
89
+ super(ErrMsg::INVALID_SEARCH_RESULT % str)
90
+ end
91
+ end
92
+
50
93
  end
@@ -1,4 +1,5 @@
1
1
  require 'fileutils'
2
+ require "open3"
2
3
 
3
4
  module Textrepo
4
5
 
@@ -19,6 +20,16 @@ module Textrepo
19
20
 
20
21
  attr_reader :extname
21
22
 
23
+ ##
24
+ # Searcher program name.
25
+
26
+ attr_reader :searcher
27
+
28
+ ##
29
+ # An array of options to pass to the searcher program.
30
+
31
+ attr_reader :searcher_options
32
+
22
33
  ##
23
34
  # Default name for the repository which uses when no name is
24
35
  # specified in the configuration settings.
@@ -31,6 +42,11 @@ module Textrepo
31
42
 
32
43
  FAVORITE_EXTNAME = 'md'
33
44
 
45
+ ##
46
+ # Default searcher program to search text in the repository.
47
+
48
+ FAVORITE_SEARCHER = 'grep'
49
+
34
50
  ##
35
51
  # Creates a new repository object. The argument, `conf` must be a
36
52
  # Hash object. It should hold the follwoing values:
@@ -41,15 +57,33 @@ module Textrepo
41
57
  # - OPTIONAL: (if not specified, default values are used)
42
58
  # - :repository_name => basename of the root path for the repository
43
59
  # - :default_extname => extname for a file stored into in the repository
60
+ # - :searcher => a program to search like `grep`
61
+ # - :searcher_options => an Array of option to pass to the searcher
44
62
  #
45
63
  # The root path of the repository looks like the following:
46
64
  # - conf[:repository_base]/conf[:repository_name]
47
65
  #
48
- # Default values are set when `repository_name` and `default_extname`
66
+ # Default values are set when `:repository_name` and `:default_extname`
49
67
  # were not defined in `conf`.
50
68
  #
69
+ # Be careful to set `:searcher_options`, it must be to specify the
70
+ # searcher behavior equivalent to `grep` with "-inRE". The
71
+ # default values for the searcher options is defined for BSD grep
72
+ # (default grep on macOS), GNU grep, and ripgrep (aka rg). They
73
+ # are:
74
+ #
75
+ # "grep" => ["-i", "-n", "-R", "-E"]
76
+ # "egrep" => ["-i", "-n", "-R"]
77
+ # "ggrep" => ["-i", "-n", "-R", "-E"]
78
+ # "gegrep" => ["-i", "-n", "-R"]
79
+ # "rg" => ["-S", "-n", "--no-heading", "--color", "never"]
80
+ #
81
+ # If use those searchers, it is not recommended to set
82
+ # `:searcher_options`. The default value works well in
83
+ # `textrepo`.
84
+ #
51
85
  # :call-seq:
52
- # new(Rbnotes::Conf or Hash) -> FileSystemRepository
86
+ # new(Hash or Hash like object) -> FileSystemRepository
53
87
 
54
88
  def initialize(conf)
55
89
  super
@@ -58,6 +92,8 @@ module Textrepo
58
92
  @path = File.expand_path("#{name}", base)
59
93
  FileUtils.mkdir_p(@path)
60
94
  @extname = conf[:default_extname] || FAVORITE_EXTNAME
95
+ @searcher = find_searcher(conf[:searcher])
96
+ @searcher_options = conf[:searcher_options]
61
97
  end
62
98
 
63
99
  ##
@@ -97,21 +133,25 @@ module Textrepo
97
133
  # Updates the file content in the repository. A new timestamp
98
134
  # will be attached to the text.
99
135
  #
136
+ # See the documentation of Repository#update to know about errors
137
+ # and constraints of this method.
138
+ #
100
139
  # :call-seq:
101
140
  # update(Timestamp, Array) -> Timestamp
102
141
 
103
142
  def update(timestamp, text)
104
143
  raise EmptyTextError if text.empty?
105
- org_abs = abspath(timestamp)
106
- raise MissingTimestampError, timestamp unless FileTest.exist?(org_abs)
144
+ raise MissingTimestampError, timestamp unless exist?(timestamp)
145
+
146
+ # does nothing if given text is the same in the repository one
147
+ return timestamp if read(timestamp) == text
107
148
 
108
149
  # the text must be stored with the new timestamp
109
150
  new_stamp = Timestamp.new(Time.now)
110
- new_abs = abspath(new_stamp)
111
- write_text(new_abs, text)
151
+ write_text(abspath(new_stamp), text)
112
152
 
113
- # delete the original file in the repository
114
- FileUtils.remove_file(org_abs)
153
+ # delete the original text file in the repository
154
+ FileUtils.remove_file(abspath(timestamp))
115
155
 
116
156
  new_stamp
117
157
  end
@@ -147,7 +187,7 @@ module Textrepo
147
187
  if exist?(stamp)
148
188
  results << stamp
149
189
  end
150
- when 0, "yyyymoddhhmiss".size, "yyyymodd".size
190
+ when 0, "yyyymoddhhmiss".size, "yyyymodd".size, "yyyymo".size
151
191
  results += find_entries(stamp_pattern)
152
192
  when 4 # "yyyy" or "modd"
153
193
  pat = nil
@@ -179,6 +219,27 @@ module Textrepo
179
219
  FileTest.exist?(abspath(timestamp))
180
220
  end
181
221
 
222
+ ##
223
+ # Searches a pattern in all text. The given pattern is a word to
224
+ # search or a regular expression. The pattern would be passed to
225
+ # a searcher program as it passed.
226
+ #
227
+ # See the document for Textrepo::Repository#search to know about
228
+ # the search result.
229
+ #
230
+ # :call-seq:
231
+ # search(String for pattern, String for Timestamp pattern) -> Array
232
+
233
+ def search(pattern, stamp_pattern = nil)
234
+ result = nil
235
+ if stamp_pattern.nil?
236
+ result = invoke_searcher_at_repo_root(@searcher, pattern)
237
+ else
238
+ result = invoke_searcher_for_entries(@searcher, pattern, entries(stamp_pattern))
239
+ end
240
+ construct_search_result(result)
241
+ end
242
+
182
243
  # :stopdoc:
183
244
 
184
245
  private
@@ -208,9 +269,154 @@ module Textrepo
208
269
 
209
270
  def find_entries(stamp_pattern)
210
271
  Dir.glob("#{@path}/**/#{stamp_pattern}*.#{@extname}").map { |e|
211
- Timestamp.parse_s(timestamp_str(e))
272
+ begin
273
+ Timestamp.parse_s(timestamp_str(e))
274
+ rescue InvalidTimestampStringError => _
275
+ # Just ignore the erroneous entry, since it is not a text in
276
+ # the repository. It may be a garbage, or some kind of
277
+ # hidden stuff of the repository, ... etc.
278
+ nil
279
+ end
280
+ }.compact
281
+ end
282
+
283
+ ##
284
+ # The upper limit of files to search at one time. The value has
285
+ # no reason to select. It seems to me that not too much, not too
286
+ # little to handle in one process to search.
287
+
288
+ LIMIT_OF_FILES = 20
289
+
290
+ ##
291
+ # When no timestamp pattern was given, invoke the searcher with
292
+ # the repository root path as its argument and the recursive
293
+ # searching option. The search could be done in only one process.
294
+
295
+ def invoke_searcher_at_repo_root(searcher, pattern)
296
+ o, s = Open3.capture2(searcher, *find_searcher_options(searcher),
297
+ pattern, @path)
298
+ output = []
299
+ output += o.lines.map(&:chomp) if s.success? && (! o.empty?)
300
+ output
301
+ end
302
+
303
+ ##
304
+ # When a timestamp pattern was given, at first, list target files,
305
+ # then invoke the searcher for those files. Since the number of
306
+ # target files may be so much, it seems to be dangerous to pass
307
+ # all of them to a single search process at one time.
308
+ #
309
+ # One more thing to mention, the searcher, like `grep`, does not
310
+ # add the filename at the beginning of the search result line, if
311
+ # the target is one file. This behavior is not suitable in this
312
+ # purpose. The code below adds the filename when the target is
313
+ # one file.
314
+
315
+ def invoke_searcher_for_entries(searcher, pattern, entries)
316
+ output = []
317
+
318
+ num_of_entries = entries.size
319
+ if num_of_entries == 1
320
+ # If the search taget is one file, the output needs special
321
+ # treatment.
322
+ file = abspath(entries[0])
323
+ o, s = Open3.capture2(searcher, *find_searcher_options(searcher),
324
+ pattern, file)
325
+ if s.success? && (! o.empty?)
326
+ output += o.lines.map { |line|
327
+ # add filename at the beginning of the search result line
328
+ [file, line.chomp].join(":")
329
+ }
330
+ end
331
+ elsif num_of_entries > LIMIT_OF_FILES
332
+ output += invoke_searcher_for_entries(searcher, pattern, entries[0..(LIMIT_OF_FILES - 1)])
333
+ output += invoke_searcher_for_entries(searcher, pattern, entries[LIMIT_OF_FILES..-1])
334
+ else
335
+ # When the number of target is less than the upper limit,
336
+ # invoke the searcher with all of target files as its
337
+ # arguments.
338
+ files = find_files(entries)
339
+ o, s = Open3.capture2(searcher, *find_searcher_options(searcher),
340
+ pattern, *files)
341
+ if s.success? && (! o.empty?)
342
+ output += o.lines.map(&:chomp)
343
+ end
344
+ end
345
+
346
+ output
347
+ end
348
+
349
+ SEARCHER_OPTS = {
350
+ # case insensitive, print line number, recursive search, work as egrep
351
+ "grep" => ["-i", "-n", "-R", "-E"],
352
+ # case insensitive, print line number, recursive search
353
+ "egrep" => ["-i", "-n", "-R"],
354
+ # case insensitive, print line number, recursive search, work as gegrep
355
+ "ggrep" => ["-i", "-n", "-R", "-E"],
356
+ # case insensitive, print line number, recursive search
357
+ "gegrep" => ["-i", "-n", "-R"],
358
+ # smart case, print line number, no color
359
+ "rg" => ["-S", "-n", "--no-heading", "--color", "never"],
360
+ }
361
+
362
+ def find_searcher_options(searcher)
363
+ @searcher_options || SEARCHER_OPTS[File.basename(searcher)] || ""
364
+ end
365
+
366
+ def find_files(timestamps)
367
+ timestamps.map{|stamp| abspath(stamp)}
368
+ end
369
+
370
+ ##
371
+ # The argument must be an Array contains the searcher output.
372
+ # Each item is constructed from 3 parts:
373
+ # "<pathname>:<integer>:<text>"
374
+ #
375
+ # For example, it may looks like:
376
+ #
377
+ # "/somewhere/2020/11/20201101044300.md:18:foo is foo"
378
+ #
379
+ # Or it may contains more ":" in the text part as:
380
+ #
381
+ # "/somewhere/2020/11/20201101044500.md:119:apple:orange:grape"
382
+ #
383
+ # In the latter case, `split(":")` will split it too much. That is,
384
+ # the result will be:
385
+ #
386
+ # ["/somewhere/2020/11/20201101044500.md", "119", "apple", "orange", "grape"]
387
+ #
388
+ # Text part must be joined with ":".
389
+
390
+ def construct_search_result(output)
391
+ output.map { |line|
392
+ begin
393
+ pathname, num, *match_text = line.split(":")
394
+ [Timestamp.parse_s(timestamp_str(pathname)),
395
+ num.to_i,
396
+ match_text.join(":")]
397
+ rescue InvalidTimestampStringError, TypeError => _
398
+ raise InvalidSearchResultError, [@searcher, @searcher_options.join(" ")].join(" ")
399
+ end
400
+ }.compact
401
+ end
402
+
403
+ def find_searcher(program = nil)
404
+ candidates = [FAVORITE_SEARCHER]
405
+ candidates.unshift(program) unless program.nil? || candidates.include?(program)
406
+ search_paths = ENV["PATH"].split(":")
407
+ candidates.map { |prog|
408
+ find_in_paths(prog, search_paths)
409
+ }[0]
410
+ end
411
+
412
+ def find_in_paths(prog, paths)
413
+ paths.each { |p|
414
+ abspath = File.expand_path(prog, p)
415
+ return abspath if FileTest.exist?(abspath) && FileTest.executable?(abspath)
212
416
  }
417
+ nil
213
418
  end
419
+ # :startdoc:
214
420
 
215
421
  end
216
422
  end
@@ -43,8 +43,17 @@ module Textrepo
43
43
  def read(timestamp); []; end
44
44
 
45
45
  ##
46
- # Updates the content with text in the repository, which is
47
- # associated to the timestamp. Returns the timestamp.
46
+ # Updates the content with given text in the repository, which is
47
+ # associated to the given timestamp. Returns the timestamp newly
48
+ # generated during the execution.
49
+ #
50
+ # If the given Timestamp is not existed as a Timestamp attached to
51
+ # text in the repository, raises MissingTimestampError.
52
+ #
53
+ # If the given text is empty, raises EmptyTextError.
54
+ #
55
+ # If the given text is identical to the text in the repository,
56
+ # does nothing. Returns the given timestamp itself.
48
57
  #
49
58
  # :call-seq:
50
59
  # update(Timestamp, Array) -> Timestamp
@@ -92,6 +101,25 @@ module Textrepo
92
101
  # exist?(Timestamp) -> true or false
93
102
 
94
103
  def exist?(timestamp); false; end
104
+
105
+ ##
106
+ # Searches a pattern (word or regular expression) in text those
107
+ # matches to a given timestamp pattern. Returns an Array of
108
+ # search results. If no match, returns an empty Array.
109
+ #
110
+ # See the document for Repository#entries about a timestamp
111
+ # pattern. When nil is passed as a timestamp pattern, searching
112
+ # applies to all text in the repository.
113
+ #
114
+ # Each entry of the result Array is constructed from 3 items, (1)
115
+ # timestamp (Timestamp), (2) line number (Integer), (3) matched
116
+ # line (String).
117
+ #
118
+ # :call-seq:
119
+ # search(String for pattern, String for Timestamp pattern) -> Array
120
+
121
+ def search(pattern, stamp_pattern = nil); []; end
122
+
95
123
  end
96
124
 
97
125
  require_relative 'file_system_repository'
@@ -72,8 +72,11 @@ module Textrepo
72
72
  # yyyymoddhhmiss sfx yyyy mo dd hh mi ss sfx
73
73
  # "20201230123456" -> "2020", "12", "30", "12", "34", "56"
74
74
  # "20201230123456_789" -> "2020", "12", "30", "12", "34", "56", "789"
75
+ #
76
+ # Raises InvalidTimestampStringError if nil was passed as an arguemnt.
75
77
 
76
78
  def split_stamp(stamp_str)
79
+ raise InvalidTimestampStringError, stamp_str if stamp_str.nil?
77
80
  # yyyy mo dd hh mi ss sfx
78
81
  a = [0..3, 4..5, 6..7, 8..9, 10..11, 12..13, 15..17].map {|r| stamp_str[r]}
79
82
  a[-1].nil? ? a[0..-2] : a
@@ -83,13 +86,27 @@ module Textrepo
83
86
  # Generate a Timestamp object from a string which represents a
84
87
  # timestamp, such "20201028163400".
85
88
  #
89
+ # Raises InvalidTimestampStringError if cannot convert the
90
+ # argument into a Timestamp object.
91
+ #
86
92
  # :call-seq:
87
93
  # parse_s("20201028163400") -> Timestamp
88
94
  # parse_s("20201028163529_034") -> Timestamp
89
95
 
90
96
  def parse_s(stamp_str)
91
- year, mon, day, hour, min, sec , sfx = split_stamp(stamp_str).map(&:to_i)
92
- Timestamp.new(Time.new(year, mon, day, hour, min, sec), sfx)
97
+ begin
98
+ ye, mo, da, ho, mi, se, sfx = split_stamp(stamp_str).map(&:to_i)
99
+ Timestamp.new(Time.new(ye, mo, da, ho, mi, se), sfx)
100
+ rescue InvalidTimestampStringError, ArgumentError => _
101
+ emsg = if stamp_str.nil?
102
+ "(nil)"
103
+ elsif stamp_str.empty?
104
+ "(empty string)"
105
+ else
106
+ stamp_str
107
+ end
108
+ raise InvalidTimestampStringError, emsg
109
+ end
93
110
  end
94
111
 
95
112
  end
@@ -1,3 +1,3 @@
1
1
  module Textrepo
2
- VERSION = '0.4.4'
2
+ VERSION = '0.5.3'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: textrepo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.4
4
+ version: 0.5.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - mnbi
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-10-28 00:00:00.000000000 Z
11
+ date: 2020-11-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler