RubyGems - lucarecord - Versions diffs - 0.2.13 - Mend

lucarecord 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

data/lib/luca_record.rb ADDED

@@ -0,0 +1,7 @@
+# frozen_string_literal: true
+require 'luca_record/version'
+module LucaRecord
+  autoload :Base, 'luca_record/base'
+  autoload :Dict, 'luca_record/dict'
+end

data/lib/luca_record/base.rb ADDED

@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+require 'luca_record/version'
+require 'luca_record/io'
+require 'luca_support'
+module LucaRecord
+  class Base
+    include LucaRecord::IO
+    include LucaSupport::View
+  end
+end

data/lib/luca_record/dict.rb ADDED

@@ -0,0 +1,146 @@
+# frozen_string_literal: true
+require 'csv'
+require 'fileutils'
+require 'yaml'
+require 'pathname'
+require 'luca_support'
+#
+# Low level API
+#
+module LucaRecord
+  class Dict
+    include LucaSupport::Code
+    def initialize(file = @filename)
+      @path = file
+      #@path = dict_path(file)
+      set_driver
+    end
+    def search(word, default_word = nil)
+      res = max_score_code(word)
+      if res[1] > 0.4
+        res[0]
+      else
+        default_word
+      end
+    end
+    #
+    # Column number settings for CSV/TSV convert
+    #
+    # :label
+    #   for double entry data
+    # :counter_label
+    #   must be specified with label
+    # :debit_label
+    #   for double entry data
+    # * debit_value
+    # :credit_label
+    #   for double entry data
+    # * credit_value
+    # :note
+    #   can be the same column as another label
+    #
+    # :encoding
+    #   file encoding
+    #
+    def csv_config
+      {}.tap do |config|
+        if @config.dig('label')
+          config[:label] = @config['label'].to_i
+          if @config.dig('counter_label')
+            config[:counter_label] = @config['counter_label']
+            config[:type] = 'single'
+          end
+        elsif @config.dig('debit_label')
+          config[:debit_label] = @config['debit_label'].to_i
+          if @config.dig('credit_label')
+            config[:credit_label] = @config['credit_label'].to_i
+            config[:type] = 'double'
+          end
+        end
+        config[:type] ||= 'invalid'
+        config[:debit_value] = @config['debit_value'].to_i if @config.dig('debit_value')
+        config[:credit_value] = @config['credit_value'].to_i if @config.dig('credit_value')
+        config[:note] = @config['note'].to_i if @config.dig('note')
+        config[:encoding] = @config['encoding'] if @config.dig('encoding')
+        config[:year] = @config['year'] if @config.dig('year')
+        config[:month] = @config['month'] if @config.dig('month')
+        config[:day] = @config['day'] if @config.dig('day')
+      end
+    end
+    #
+    # Load CSV with config options
+    #
+    def load_csv(path)
+      CSV.read(path, headers: true, encoding: "#{@config.dig('encoding') || 'utf-8'}:utf-8").each do |row|
+        yield row
+      end
+    end
+    #
+    # load dictionary data
+    #
+    def self.load(file = @filename)
+      case File.extname(file)
+      when '.tsv', '.csv'
+        load_tsv_dict(dict_path(file))
+      when '.yaml', '.yml'
+        YAML.load_file(dict_path(file), **{})
+      else
+        raise 'cannot load this filetype'
+      end
+    end
+    #
+    # generate dictionary from TSV file. Minimum assumption is as bellows:
+    # 1st row is converted symbol.
+    #
+    # * row[0] is 'code'. Converted hash keys
+    # * row[1] is 'label'. Should be human readable labels
+    # * after row[2] can be app specific data
+    #
+    def self.load_tsv_dict(path)
+      {}.tap do |dict|
+        CSV.read(path, headers: true, col_sep: "\t", encoding: 'UTF-8').each do |row|
+          {}.tap do |entry|
+            row.each do |header, field|
+              next if row.index(header).zero?
+              entry[header.to_sym] = field unless field.nil?
+            end
+            dict[row[0]] = entry
+          end
+        end
+      end
+    end
+    private
+    def set_driver
+      input = self.class.load(@path)
+      @config = input['config']
+      @definitions = input['definitions']
+    end
+    def self.dict_path(filename)
+      Pathname(LucaSupport::Config::Pjdir) / 'dict' / filename
+    end
+    def self.reverse(dict)
+      dict.map{ |k, v| [v[:label], k] }.to_h
+    end
+    def max_score_code(str)
+      res = @definitions.map do |k, v|
+        [v, LucaSupport.match_score(str, k, 3)]
+      end
+      res.max { |x, y| x[1] <=> y[1] }
+    end
+  end
+end

data/lib/luca_record/io.rb ADDED

@@ -0,0 +1,337 @@
+require 'csv'
+require 'date'
+require 'fileutils'
+require 'yaml'
+require 'pathname'
+require 'luca_support/code'
+require 'luca_support/config'
+#
+# Low level API
+# manipulate files based on transaction date
+#
+module LucaRecord
+  module IO
+    include LucaSupport::Code
+    def self.included(klass) # :nodoc:
+      klass.extend ClassMethods
+    end
+    #
+    # Used @date for searching current settings
+    # query can be nested hash for other than 'val'
+    #
+    #   where(contract_status: 'active')
+    #   where(graded: {rank: 5})
+    #
+    def where(**query)
+      return enum_for(:where, **query) unless block_given?
+      query.each do |key, val|
+        v = val.respond_to?(:values) ? val.values.first : val
+        label = val.respond_to?(:keys) ? val.keys.first : 'val'
+        self.class.all do |data|
+          next unless data.keys.map(&:to_sym).include?(key)
+          processed = parse_current(data)
+          yield processed if v == processed.dig(key.to_s, label.to_s)
+        end
+      end
+    end
+    module ClassMethods
+      #
+      # find ID based record. Support uuid and encoded date.
+      #
+      def find(id, basedir = @dirname)
+        return enum_for(:find, id, basedir) unless block_given?
+        if id.length >= 40
+          open_hashed(basedir, id) do |f|
+            yield load_data(f)
+          end
+        elsif id.length >= 9
+          # TODO: need regexp match for more flexible coding(after AD9999)
+          open_records(basedir, id[0, 5], id[5, 6]) do |f, path|
+            yield load_data(f, path)
+          end
+        else
+          raise 'specified id length is too short'
+        end
+      end
+      #
+      # search date based record.
+      #
+      # * data hash
+      # * data id. Array like [2020H, V001]
+      #
+      def asof(year, month = nil, day = nil, basedir = @dirname)
+        return enum_for(:search, year, month, day, nil, basedir) unless block_given?
+        search(year, month, day, nil, basedir) do |data, path|
+          yield data, path
+        end
+      end
+      #
+      # search with date params & code.
+      #
+      def search(year, month = nil, day = nil, code = nil, basedir = @dirname)
+        return enum_for(:search, year, month, day, code, basedir) unless block_given?
+        subdir = year.to_s + LucaSupport::Code.encode_month(month)
+        open_records(basedir, subdir, LucaSupport::Code.encode_date(day), code) do |f, path|
+          if @record_type == 'raw'
+            yield f, path
+          else
+            yield load_data(f, path), path
+          end
+        end
+      end
+      #
+      # retrieve all data
+      #
+      def all(basedir = @dirname)
+        return enum_for(:all, basedir) unless block_given?
+        open_all(basedir) do |f|
+          yield load_data(f)
+        end
+      end
+      #
+      # convert ID to file path. Normal argument is as follows:
+      #
+      # * [2020H, V001]
+      # * "2020H/V001"
+      # * "a7b806d04a044c6dbc4ce72932867719"
+      #
+      def id2path(id)
+        if id.is_a?(Array)
+          id.join('/')
+        elsif id.include?('/')
+          id
+        else
+          encode_hashed_path(id)
+        end
+      end
+      #
+      # Directory separation for performance. Same as Git way.
+      #
+      def encode_hashed_path(id, split_factor = 3)
+        len = id.length
+        if len <= split_factor
+          ['', id]
+        else
+          [id[0, split_factor], id[split_factor, len - split_factor]]
+        end
+      end
+      def add_status!(id, status, basedir = @dirname)
+        path = abs_path(basedir) / id2path(id)
+        origin = YAML.load_file(path, {})
+        newline = { status => DateTime.now.to_s }
+        origin['status'] = [] if origin['status'].nil?
+        origin['status'] << newline
+        File.write(path, YAML.dump(origin.sort.to_h))
+      end
+      #
+      # create hash based record
+      #
+      def create(obj, basedir = @dirname)
+        id = LucaSupport::Code.issue_random_id
+        obj['id'] = id
+        open_hashed(basedir, id, 'w') do |f|
+          f.write(YAML.dump(obj.sort.to_h))
+        end
+        id
+      end
+      #
+      # define new transaction ID & write data at once
+      #
+      def create_record!(date_obj, codes = nil, basedir = @dirname)
+        gen_record_file!(basedir, date_obj, codes) do |f|
+          f.write CSV.generate('', col_sep: "\t", headers: false) { |c| yield(c) }
+        end
+      end
+      def gen_record_file!(basedir, date_obj, codes = nil)
+        d = prepare_dir!(abs_path(basedir), date_obj)
+        filename = LucaSupport::Code.encode_date(date_obj) + new_record_id(abs_path(basedir), date_obj)
+        if codes
+          filename += codes.inject('') { |fragment, code| "#{fragment}-#{code}" }
+        end
+        path = Pathname(d) + filename
+        File.open(path.to_s, 'w') { |f| yield(f) }
+      end
+      def new_record_id(basedir, date_obj)
+        LucaSupport::Code.encode_txid(new_record_no(basedir, date_obj))
+      end
+      def prepare_dir!(basedir, date_obj)
+        dir_name = (Pathname(basedir) + encode_dirname(date_obj)).to_s
+        FileUtils.mkdir_p(dir_name) unless Dir.exist?(dir_name)
+        dir_name
+      end
+      def encode_dirname(date_obj)
+        date_obj.year.to_s + LucaSupport::Code.encode_month(date_obj)
+      end
+      private
+      #
+      # open records with 'basedir/month/date-code' path structure.
+      # Glob pattern can be specified like folloing examples.
+      #
+      #   '2020': All month of 2020
+      #   '2020[FG]': June & July of 2020
+      #
+      # Block will receive code fragments as 2nd parameter. Array format is as bellows:
+      # 1. encoded month
+      # 2. encoded day + record number of the day
+      # 3. codes. More than 3 are all code set except first 2 parameters.
+      #
+      def open_records(basedir, subdir, filename = nil, code = nil, mode = 'r')
+        return enum_for(:open_records, basedir, subdir, filename, code, mode) unless block_given?
+        file_pattern = filename.nil? ? '*' : "#{filename}*"
+        Dir.chdir(abs_path(basedir)) do
+          Dir.glob("#{subdir}*/#{file_pattern}").sort.each do |subpath|
+            next if skip_on_unmatch_code(subpath, code)
+            id_set = subpath.split('/').map { |str| str.split('-') }.flatten
+            File.open(subpath, mode) { |f| yield(f, id_set) }
+          end
+        end
+      end
+      #
+      # git object like structure
+      #
+      def open_hashed(basedir, id, mode = 'r')
+        return enum_for(:open_hashed, basedir, id, mode) unless block_given?
+        subdir, filename = encode_hashed_path(id)
+        dirpath = Pathname(abs_path(basedir)) + subdir
+        FileUtils.mkdir_p(dirpath.to_s) if mode != 'r'
+        File.open((dirpath + filename).to_s, mode) { |f| yield f }
+      end
+      #
+      # scan through all files
+      #
+      def open_all(basedir, mode = 'r')
+        return enum_for(:open_all, basedir, mode) unless block_given?
+        dirpath = Pathname(abs_path(basedir)) / '*' / '*'
+        Dir.glob(dirpath.to_s).each do |filename|
+          File.open(filename, mode) { |f| yield f }
+        end
+      end
+      #
+      # Decode basic format.
+      # If specific decode is needed, override this method in each class.
+      #
+      def load_data(io, path = nil)
+        case @record_type
+        when 'raw'
+          # TODO: raw may be unneeded in favor of override
+          io
+        when 'json'
+        # TODO: implement JSON parse
+        else
+          YAML.load(io.read)
+        end
+      end
+      # TODO: replace with data_dir method
+      def abs_path(base_dir)
+        Pathname(LucaSupport::Config::Pjdir) / 'data' / base_dir
+      end
+      # true when file doesn't have record on code
+      # false when file may have one
+      def skip_on_unmatch_code(subpath, code = nil)
+        # p filename.split('-')[1..-1]
+        filename = subpath.split('/').last
+        return false if code.nil? || filename.length <= 4
+        !filename.split('-')[1..-1].include?(code)
+      end
+      # AUTO INCREMENT
+      def new_record_no(basedir, date_obj)
+        dir_name = (Pathname(basedir) + encode_dirname(date_obj)).to_s
+        raise 'No target dir exists.' unless Dir.exist?(dir_name)
+        Dir.chdir(dir_name) do
+          last_file = Dir.glob("#{LucaSupport::Code.encode_date(date_obj)}*").max
+          return 1 if last_file.nil?
+          return LucaSupport::Code.decode_txid(last_file[1, 3]) + 1
+        end
+      end
+    end # end of ClassModules
+    def set_data_dir(dir_path = LucaSupport::Config::Pjdir)
+      if dir_path.nil?
+        raise 'No project path is specified'
+      elsif !valid_project?(dir_path)
+        raise 'Specified path is not for valid project'
+      else
+        project_dir = Pathname(dir_path)
+      end
+      (project_dir + 'data/').to_s
+    end
+    def valid_project?(path)
+      project_dir = Pathname(path)
+      FileTest.file?((project_dir + 'config.yml').to_s) and FileTest.directory?( (project_dir + 'data').to_s)
+    end
+    #
+    # for date based records
+    #
+    def scan_terms(base_dir, query = nil)
+      pattern = query.nil? ? "*" : "#{query}*"
+      Dir.chdir(base_dir) do
+        Dir.glob(pattern).select { |dir|
+          FileTest.directory?(dir) && /^[0-9]/.match(dir)
+        }.sort.map { |str| decode_term(str) }
+      end
+    end
+    def load_config(path = nil)
+      path = path.to_s
+      if File.exists?(path)
+        YAML.load_file(path, **{})
+      else
+        {}
+      end
+    end
+    def has_status?(dat, status)
+      return false if dat['status'].nil?
+      dat['status'].map { |h| h.key?(status) }
+        .include?(true)
+    end
+    def load_tsv(path)
+      return enum_for(:load_tsv, path) unless block_given?
+      data = CSV.read(path, headers: true, col_sep: "\t", encoding: 'UTF-8')
+      data.each { |row| yield row }
+    end
+  end
+end