RubyGems - textrepo - Versions diffs - 0.4.5 → 0.5.0 - Mend

textrepo 0.4.5 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/lib/textrepo/error.rb +26 -13
data/lib/textrepo/file_system_repository.rb +194 -2
data/lib/textrepo/repository.rb +19 -0
data/lib/textrepo/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 24594803cdcc722b7a9ca01648a9d8955cb9637669ab1c459e653c2eb2d2c199
-  data.tar.gz: bc5802a9a1df190d2278163b25d5ff1d2d107223acec17a3e713d1daf30a2218
+  metadata.gz: dc5cf6089b4883c93dc228e19aa0a149d71a8d9cc26a92e6c75fdc4ee0b2d694
+  data.tar.gz: 8ebecace02d486b6b6c12256d52adfd44963a3a14f6d39729b83d426bce3a26e
 SHA512:
-  metadata.gz: 21ef95e13e30d816b671be203b7beffa1ab022aec94bfc9540a7712f548cbf5749a8f5dc8f13e1f22067f53f52ac63dd2ac75ffa32719d72deb49b6b410c05a3
-  data.tar.gz: bef5bcfbef5a5557f80c44035dd42a9c8474cb04836e3328a6b0a7f0c065d5b601ff92bfcd4dee4a7a7c3273ac9e0648ae7a1e3bb52acdd95627de9266a04767
+  metadata.gz: aef27bf0363a66eeb1676ccda0682c253155875a8d0c16b27189674836edf2563b8df87b7f3b56cdb5d02c895f724f47a956588a5d97d24d83cdddce76fc083e
+  data.tar.gz: 81a4f8a76eb0ec8545e29ddbd537d49ec00f6009f5617dad61c61d8a5552c6df090d6d37fd507a7976c5f5f60b7d0c8b43f9a99d98d7efa1e933828270711d87

data/CHANGELOG.md CHANGED

@@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/).
 ## [Unreleased]
 Nothing to record here.
+## [0.5.0] - 2020-11-01
+### Added
+- Add a new API `Repository#search`.
+- Add a new API `Repository#exist?`. (0.4.3)
 ## [0.4.0] - 2020-10-14
 ### Added
 - Released to rubygems.org.

data/lib/textrepo/error.rb CHANGED

@@ -2,19 +2,21 @@ module Textrepo
   ##
   # Following errors might occur in repository operations:
-  #   +--------------------------+---------------------+
-  #   | operation (args)         | error type          |
-  #   +--------------------------+---------------------+
-  #   | create (timestamp, text) | Duplicate timestamp |
-  #   |                          | Empty text          |
-  #   +--------------------------+---------------------+
-  #   | read   (timestamp)       | Missing timestamp   |
-  #   +--------------------------+---------------------+
-  #   | update (timestamp, text) | Mssing timestamp    |
-  #   |                          | Empty text          |
-  #   +--------------------------+---------------------+
-  #   | delete (timestamp)       | Missing timestamp   |
-  #   +--------------------------+---------------------+
+  #   +---------------------------------+-----------------------+
+  #   | operation (args)                | error type            |
+  #   +---------------------------------+-----------------------+
+  #   | create (timestamp, text)        | Duplicate timestamp   |
+  #   |                                 | Empty text            |
+  #   +---------------------------------+-----------------------+
+  #   | read   (timestamp)              | Missing timestamp     |
+  #   +---------------------------------+-----------------------+
+  #   | update (timestamp, text)        | Mssing timestamp      |
+  #   |                                 | Empty text            |
+  #   +---------------------------------+-----------------------+
+  #   | delete (timestamp)              | Missing timestamp     |
+  #   +---------------------------------+-----------------------+
+  #   | search (pattern, stamp_pattern) | Invalid search result |
+  #   +---------------------------------+-----------------------+
   class Error < StandardError; end
@@ -25,6 +27,7 @@ module Textrepo
     EMPTY_TEXT          = 'empty text'
     MISSING_TIMESTAMP   = 'missing timestamp: %s'
     INVALID_TIMESTAMP_STRING = "invalid string as timestamp: %s"
+    INVALID_SEARCH_RESULT = "invalid result by searcher: %s"
   end
   # :startdoc:
@@ -77,4 +80,14 @@ module Textrepo
     end
   end
+  ##
+  # An error raise if the search result is not suitable to use.
+  #
+  class InvalidSearchResultError < Error
+    def initialize(str)
+      super(ErrMsg::INVALID_SEARCH_RESULT % str)
+    end
+  end
 end

data/lib/textrepo/file_system_repository.rb CHANGED

@@ -1,4 +1,5 @@
 require 'fileutils'
+require "open3"
 module Textrepo
@@ -19,6 +20,16 @@ module Textrepo
     attr_reader :extname
+    ##
+    # Searcher program name.
+    attr_reader :searcher
+    ##
+    # An array of options to pass to the searcher program.
+    attr_reader :searcher_options
     ##
     # Default name for the repository which uses when no name is
     # specified in the configuration settings.
@@ -31,6 +42,11 @@ module Textrepo
     FAVORITE_EXTNAME = 'md'
+    ##
+    # Default searcher program to search text in the repository.
+    FAVORITE_SEARCHER = 'grep'
     ##
     # Creates a new repository object.  The argument, `conf` must be a
     # Hash object.  It should hold the follwoing values:
@@ -41,15 +57,32 @@ module Textrepo
     # - OPTIONAL: (if not specified, default values are used)
     #   - :repository_name => basename of the root path for the repository
     #   - :default_extname => extname for a file stored into in the repository
+    #   - :searcher => a program to search like `grep`
+    #   - :searcher_options => an Array of option to pass to the searcher
     #
     # The root path of the repository looks like the following:
     # - conf[:repository_base]/conf[:repository_name]
     #
-    # Default values are set when `repository_name` and `default_extname`
+    # Default values are set when `:repository_name` and `:default_extname`
     # were not defined in `conf`.
     #
+    # Be careful to set `:searcher_options`, it must be to specify the
+    # searcher behavior equivalent to `grep` with "-inR".  The default
+    # value for the searcher options is defined for BSD grep (default
+    # grep on macOS), GNU grep, and ripgrep (aka rg).  They are:
+    #
+    #   "grep"   => ["-i", "-n", "-R", "-E"]
+    #   "egrep"  => ["-i", "-n", "-R"]
+    #   "ggrep"  => ["-i", "-n", "-R", "-E"]
+    #   "gegrep" => ["-i", "-n", "-R"]
+    #   "rg"     => ["-S", "-n", "--no-heading", "--color", "never"]
+    #
+    # If use those 3 searchers, it is not recommended to set
+    # `:searcher_options`.  The default value works well in
+    # `textrepo`.
+    #
     # :call-seq:
-    #     new(Rbnotes::Conf or Hash) -> FileSystemRepository
+    #     new(Hash or Hash like object) -> FileSystemRepository
     def initialize(conf)
       super
@@ -58,6 +91,8 @@ module Textrepo
       @path = File.expand_path("#{name}", base)
       FileUtils.mkdir_p(@path)
       @extname = conf[:default_extname] || FAVORITE_EXTNAME
+      @searcher = find_searcher(conf[:searcher])
+      @searcher_options = conf[:searcher_options]
     end
     ##
@@ -179,6 +214,27 @@ module Textrepo
       FileTest.exist?(abspath(timestamp))
     end
+    ##
+    # Searches a pattern in all text.  The given pattern is a word to
+    # search or a regular expression.  The pattern would be passed to
+    # a searcher program as it passed.
+    #
+    # See the document for Textrepo::Repository#search to know about
+    # the search result.
+    #
+    # :call-seq:
+    #     search(String for pattern, String for Timestamp pattern) -> Array
+    def search(pattern, stamp_pattern = nil)
+      result = nil
+      if stamp_pattern.nil?
+        result = invoke_searcher_at_repo_root(@searcher, pattern)
+      else
+        result = invoke_searcher_for_entries(@searcher, pattern, entries(stamp_pattern))
+      end
+      construct_search_result(result)
+    end
     # :stopdoc:
     private
@@ -219,6 +275,142 @@ module Textrepo
       }.compact
     end
+    ##
+    # The upper limit of files to search at one time.  The value has
+    # no reason to select.  It seems to me that not too much, not too
+    # little to handle in one process to search.
+    LIMIT_OF_FILES = 20
+    ##
+    # When no timestamp pattern was given, invoke the searcher with
+    # the repository root path as its argument and the recursive
+    # searching option.  The search could be done in only one process.
+    def invoke_searcher_at_repo_root(searcher, pattern)
+      o, s = Open3.capture2(searcher, *find_searcher_options(searcher),
+                            pattern, @path)
+      output = []
+      output += o.lines.map(&:chomp) if s.success? && (! o.empty?)
+      output
+    end
+    ##
+    # When a timestamp pattern was given, at first, list target files,
+    # then invoke the searcher for those files.  Since the number of
+    # target files may be so much, it seems to be dangerous to pass
+    # all of them to a single search process at one time.
+    #
+    # One more thing to mention, the searcher, like `grep`, does not
+    # add the filename at the beginning of the search result line, if
+    # the target is one file.  This behavior is not suitable in this
+    # purpose.  The code below adds the filename when the target is
+    # one file.
+    def invoke_searcher_for_entries(searcher, pattern, entries)
+      output = []
+      num_of_entries = entries.size
+      if num_of_entries == 1
+        # If the search taget is one file, the output needs special
+        # treatment.
+        file = abspath(entries[0])
+        o, s = Open3.capture2(searcher, *find_searcher_options(searcher),
+                              pattern, file)
+        if s.success? && (! o.empty)
+          output += o.lines.map { |line|
+            # add filename at the beginning of the search result line
+            [file, line.chomp].join(":")
+          }
+        end
+      elsif num_of_entries > LIMIT_OF_FILES
+        output += invoke_searcher_for_entries(searcher, pattern, entries[0..(LIMIT_OF_FILES - 1)])
+        output += invoke_searcher_for_entries(searcher, pattern, entries[LIMIT_OF_FILES..-1])
+      else
+        # When the number of target is less than the upper limit,
+        # invoke the searcher with all of target files as its
+        # arguments.
+        files = find_files(entries)
+        o, s = Open3.capture2(searcher, *find_searcher_options(searcher),
+                              pattern, *files)
+        if s.success? && (! o.empty)
+          output += o.lines.map(&:chomp)
+        end
+      end
+      output
+    end
+    SEARCHER_OPTS = {
+      # case insensitive, print line number, recursive search, work as egrep
+      "grep"   => ["-i", "-n", "-R", "-E"],
+      # case insensitive, print line number, recursive search
+      "egrep"  => ["-i", "-n", "-R"],
+      # case insensitive, print line number, recursive search, work as gegrep
+      "ggrep"  => ["-i", "-n", "-R", "-E"],
+      # case insensitive, print line number, recursive search
+      "gegrep" => ["-i", "-n", "-R"],
+      # smart case, print line number, no color
+      "rg"     => ["-S", "-n", "--no-heading", "--color", "never"],
+    }
+    def find_searcher_options(searcher)
+      @searcher_options || SEARCHER_OPTS[File.basename(searcher)] || ""
+    end
+    def find_files(timestamps)
+      timestamps.map{|stamp| abspath(stamp)}
+    end
+    ##
+    # The argument must be an Array contains the searcher output.
+    # Each item is constructed from 3 parts:
+    #   "<pathname>:<integer>:<text>"
+    #
+    # For example, it may looks like:
+    #
+    #   "/somewhere/2020/11/20201101044300.md:18:foo is foo"
+    #
+    # Or it may contains more ":" in the text part as:
+    #
+    #   "/somewhere/2020/11/20201101044500.md:119:apple:orange:grape"
+    #
+    # In the latter case, `split(":")` will split it too much.  That is,
+    # the result will be:
+    #
+    #  ["/somewhere/2020/11/20201101044500.md", "119", "apple", "orange", "grape"]
+    #
+    # Text part must be joined with ":".
+    def construct_search_result(output)
+      output.map { |line|
+        begin
+          pathname, num, *match_text = line.split(":")
+          [Timestamp.parse_s(timestamp_str(pathname)),
+           num.to_i,
+           match_text.join(":")]
+        rescue InvalidTimestampStringError, TypeError => _
+          raise InvalidSearchResultError, [@searcher, @searcher_options.join(" ")].join(" ")
+        end
+      }.compact
+    end
+    def find_searcher(program = nil)
+      candidates = [FAVORITE_SEARCHER]
+      candidates.unshift(program) unless program.nil? || candidates.include?(program)
+      search_paths = ENV["PATH"].split(":")
+      candidates.map { |prog|
+        find_in_paths(prog, search_paths)
+      }[0]
+    end
+    def find_in_paths(prog, paths)
+      paths.each { |p|
+        abspath = File.expand_path(prog, p)
+        return abspath if FileTest.exist?(abspath) && FileTest.executable?(abspath)
+      }
+      nil
+    end
     # :startdoc:
   end

data/lib/textrepo/repository.rb CHANGED

@@ -92,6 +92,25 @@ module Textrepo
     #     exist?(Timestamp) -> true or false
     def exist?(timestamp); false; end
+    ##
+    # Searches a pattern (word or regular expression) in text those
+    # matches to a given timestamp pattern.  Returns an Array of
+    # search results.  If no match, returns an empty Array.
+    #
+    # See the document for Repository#entries about a timestamp
+    # pattern.  When nil is passed as a timestamp pattern, searching
+    # applies to all text in the repository.
+    #
+    # Each entry of the result Array is constructed from 3 items, (1)
+    # timestamp (Timestamp), (2) line number (Integer), (3) matched
+    # line (String).
+    #
+    # :call-seq:
+    #     search(String for pattern, String for Timestamp pattern) -> Array
+    def search(pattern, stamp_pattern = nil); []; end
   end
   require_relative 'file_system_repository'

data/lib/textrepo/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Textrepo
-  VERSION = '0.4.5'
+  VERSION = '0.5.0'
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: textrepo
 version: !ruby/object:Gem::Version
-  version: 0.4.5
+  version: 0.5.0
 platform: ruby
 authors:
 - mnbi
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2020-10-30 00:00:00.000000000 Z
+date: 2020-11-01 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler