RubyGems - dreader - Versions diffs - 0.5.0 → 1.1.0 - Mend

dreader 0.5.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

checksums.yaml +4 -4
data/CHANGELOG.org +92 -0
data/Gemfile.lock +20 -7
data/README.org +821 -0
data/dreader.gemspec +6 -4
data/examples/age/age.rb +41 -25
data/examples/age_with_multiple_checks/Birthdays.ods +0 -0
data/examples/age_with_multiple_checks/age_with_multiple_checks.rb +64 -0
data/examples/local_vars/local_vars.rb +28 -0
data/examples/template/template_generation.rb +37 -0
data/examples/wikipedia_big_us_cities/big_us_cities.rb +24 -20
data/examples/wikipedia_us_cities/us_cities.rb +31 -28
data/examples/wikipedia_us_cities/us_cities_bulk_declare.rb +25 -23
data/lib/dreader/column.rb +39 -0
data/lib/dreader/engine.rb +495 -0
data/lib/dreader/options.rb +16 -0
data/lib/dreader/util.rb +86 -0
data/lib/dreader/version.rb +1 -1
data/lib/dreader.rb +5 -411
metadata +60 -24
data/Changelog.org +0 -20
data/README.md +0 -469

data/lib/dreader.rb CHANGED Viewed

@@ -1,412 +1,6 @@
-require 'dreader/version'
-require 'roo'
+require "dreader/column"
+require "dreader/engine"
+require "dreader/options"
+require "dreader/util"
+require "dreader/version"
-module Dreader
-  # service class to implement the column DSL language
-  class Column
-    def colref colref
-      @colref = colref
-    end
-    def process &block
-      @process = block
-    end
-    def check &block
-      @check = block
-    end
-    def to_hash
-      {process: @process, check: @check, colref: @colref }
-    end
-  end
-  # service class to implement the options DSL language
-  class Options
-    def initialize
-      @attributes = {}
-    end
-    def method_missing(name, *args, &block)
-      @attributes[name] = args[0]
-    end
-    def to_hash
-      @attributes
-    end
-  end
-  # Utilities function to simplify importing data into
-  # ActiveRecords
-  class Util
-    # given a hash returned by Engine, return the same hash with
-    # keys directly bound to the content of the :value sub-key
-    #
-    # Example
-    #
-    # hash = {name: {value: "A", ...}, surname: {value: "B", ...}}
-    # simplify hash
-    # {name: "A", surname: "B"}
-    def self.simplify hash
-      new_hash = {}
-      hash.keys.map { |k| new_hash[k] = hash[k][:value] }
-      new_hash
-    end
-    # given a hash returned by Engine, keep the "kept" keys in the top
-    # of the hierarchy and move the "moved_key" below the
-    # "subordinate_key"
-    #
-    # Example
-    #
-    # hash = {name: "A", surname: "B", address: "via XX Settembre", city: "Genoa"}
-    # restructure hash, [:name, :surname], :address_attributes, [:address, :city]
-    # {name: "A", surname: "B", address_attributes: {address: "via XX Settembre", city: "Genoa"}}
-    #
-    def self.restructure hash, kept, subordinate_key, moved_keys
-      head = hash.slice kept
-      subordinate = self.prepend subordinate_key, hash.slice(moved_keys)
-      head.merge subordinate
-    end
-    # an alias for Hash.slice
-    # keys is an array of keys
-    def self.slice hash, keys
-      hash.slice *keys
-    end
-    # remove all `keys` from `hash`
-    def self.clean hash, keys
-      hash.reject { |k, v| keys.include?(k) }
-    end
-    # given a hash, return a new hash with key and whose value is
-    # the hash
-    #
-    # Example:
-    #
-    #    hash = {name: "A", size: 10}
-    #    prepend hash, :product_attributes
-    #    {product_attributes: {name: "A", size: 10}}
-    #
-    def self.prepend hash, key
-      {key => hash}
-    end
-  end
-  #
-  # This is where the real stuff begins
-  #
-  class Engine
-    # readable for debugging purposes
-    # the options we passed
-    attr_reader :options
-    # the specification of the columns to process
-    attr_reader :colspec
-    # the specification of the virtual columns
-    attr_reader :virtualcols
-    # the data we read
-    attr_reader :table
-    def initialize
-      @options = {}
-      @colspec = []
-      @virtualcols = []
-    end
-    # define a DSL for options
-    # any string is processed as an option and it ends up in the
-    # @options hash
-    def options &block
-      options = Options.new
-      options.instance_eval(&block)
-      @options = options.to_hash
-    end
-    # define a DSL for column specification
-    # - `name` is the name of the column
-    # - `block` contains two declarations, `process` and `check`, which are
-    #   used, respectively, to make a cell into the desired data and to check
-    #   whether the desired data is ok
-    def column name, &block
-      column = Column.new
-      column.instance_eval(&block)
-      @colspec << column.to_hash.merge({name: name})
-    end
-    # bulk declare columns we intend to read
-    #
-    # - hash is a hash in the form { symbolic_name: colref }
-    #
-    # i.bulk_declare {name: 'B', age: 'C'} is equivalent to:
-    #
-    # i.column :name do
-    #   colref 'B'
-    # end
-    # i.column :age do
-    #   colref 'C'
-    # end
-    #
-    # i.bulk_declare {name: 'B', age: 'C'} do
-    #   process do |cell|
-    #     cell.strip
-    #   end
-    # end
-    #
-    # is equivalent to:
-    #
-    # i.column :name do
-    #   colref 'B'
-    #   process do |cell|
-    #     cell.strip
-    #   end
-    # end
-    # i.column :age do
-    #   colref 'C'
-    #   process do |cell|
-    #     cell.strip
-    #   end
-    # end
-    def bulk_declare hash, &block
-      hash.keys.each do |key|
-        column = Column.new
-        column.colref hash[key]
-        if block
-          column.instance_eval(&block)
-        end
-        @colspec << column.to_hash.merge({name: key})
-      end
-    end
-    # virtual columns define derived attributes
-    # the code specified in the virtual column is executed after reading
-    # a row and before applying the mapping function
-    #
-    # virtual colum declarations are executed in the order in which
-    # they are defined
-    def virtual_column name, &block
-      column = Column.new
-      column.instance_eval &block
-      @virtualcols << column.to_hash.merge({name: name})
-    end
-    # define what we do with each line we read
-    # - `block` is the code which takes as input a `row` and processes
-    #   `row` is a hash in which each spreadsheet cell is accessible under
-    #   the column names. Each cell has the following values:
-    #   :value, :error, :row_number, :col_number
-    def mapping &block
-      @mapping = block
-    end
-    # read a file and store it internally
-    #
-    # @param hash, a hash, possibly overriding any of the parameters
-    #              set in the initial options.  This allows you, for
-    #              instance, to apply the same column specification to
-    #              different files and different sheets
-    #
-    # @return the data read from filename, in the form of an array of
-    #         hashes
-    def read args = {}
-      if args.class == Hash
-        hash = @options.merge(args)
-      else
-        puts "dreader error at #{__callee__}: this function takes a Hash as input"
-        exit
-      end
-      spreadsheet = Dreader::Engine.open_spreadsheet (hash[:filename])
-      sheet = spreadsheet.sheet(hash[:sheet] || 0)
-      @table = Array.new
-      @errors = Array.new
-      first_row = hash[:first_row] || 1
-      last_row = hash[:last_row] || sheet.last_row
-      (first_row..last_row).each do |row_number|
-        r = Hash.new
-        @colspec.each_with_index do |colspec, index|
-          cell = sheet.cell(row_number, colspec[:colref])
-          colname = colspec[:name]
-          r[colname] = Hash.new
-          r[colname][:row_number] = row_number
-          r[colname][:col_number] = colspec[:colref]
-          begin
-            r[colname][:value] = value = colspec[:process] ? colspec[:process].call(cell) : cell
-          rescue => e
-            puts "dreader error at #{__callee__}: 'process' specification for :#{colname} raised an exception at row #{row_number} (col #{index + 1}, value: #{cell})"
-            raise e
-          end
-          begin
-            if colspec[:check] and not colspec[:check].call(value) then
-              r[colname][:error] = true
-              @errors << "dreader error at #{__callee__}: value \"#{cell}\" for #{colname} at row #{row_number} (col #{index + 1}) does not pass the check function"
-            else
-              r[colname][:error] = false
-            end
-          rescue => e
-            puts "dreader error at #{__callee__}: 'check' specification for :#{colname} raised an exception at row #{row_number} (col #{index + 1}, value: #{cell})"
-            raise e
-          end
-        end
-        @table << r
-      end
-      @table
-    end
-    alias_method :load, :read
-    # get (processed) row number
-    #
-    # - row_number is the row to get: index starts at 1.
-    #
-    # get_row(1) get the first line read, that is, the row specified
-    # by `first_row` in `options` (or in read)
-    #
-    # You need to invoke read first
-    def get_row row_number
-      if row_number > @table.size
-        puts "dreader error at #{__callee__}: 'row_number' is out of range (did you invoke read first?)"
-        exit
-      elsif row_number <= 0
-        puts "dreader error at #{__callee__}: 'row_number' is zero or negative (first row is 1)."
-      else
-        @table[row_number - 1]
-      end
-    end
-    # show to stdout the first `n` records we read from the file given the current
-    # configuration
-    def debug args = {}
-      if args.class == Hash
-        hash = @options.merge(args)
-      else
-        puts "dreader error at #{__callee__}: this function takes a Hash as input"
-        exit
-      end
-      # apply some defaults, if not defined in the options
-      hash[:process] = true if not hash.has_key? :process # shall we apply the process function?
-      hash[:check] = true if not hash.has_key? :check     # shall we check the data read?
-      hash[:n] = 10 if not hash[:n]
-      spreadsheet = Dreader::Engine.open_spreadsheet (hash[:filename])
-      sheet = spreadsheet.sheet(hash[:sheet] || 0)
-      puts "Current configuration:"
-      @options.each do |k, v|
-        puts "  #{k}: #{v}"
-      end
-      puts "Configuration used by debug:"
-      hash.each do |k, v|
-        puts "  #{k}: #{v}"
-      end
-      n = hash[:n]
-      first_row = hash[:first_row] || 1
-      last_row = first_row + n - 1
-      puts "  Last row (according to roo): #{sheet.last_row}"
-      puts "  Number of rows I will read in this session: #{n} (from #{first_row} to #{last_row})"
-      (first_row..last_row).each do |row_number|
-        puts "Row #{row_number} is:"
-        r = Hash.new
-        @colspec.each_with_index do |colspec, index|
-          colname = colspec[:name]
-          cell = sheet.cell(row_number, colspec[:colref])
-          processed_str = ""
-          checked_str = ""
-          if hash[:process]
-            begin
-              processed = colspec[:process] ? colspec[:process].call(cell) : cell
-              processed_str = "processed: '#{processed}' (#{processed.class})"
-            rescue => e
-              puts "dreader error at #{__callee__}: 'check' specification for :#{colname} raised an exception at row #{row_number} (col #{index + 1}, value: #{cell})"
-              raise e
-            end
-          end
-          if hash[:check]
-            begin
-              processed = colspec[:process] ? colspec[:process].call(cell) : cell
-              check = colspec[:check] ? colspec[:check].call(processed) : "no check specified"
-              checked_str = "checked: '#{check}'"
-            rescue => e
-              puts "dreader error at #{__callee__}: 'check' specification for #{colname} at row #{row_number} raised an exception (col #{index + 1}, value: #{cell})"
-              raise e
-            end
-          end
-          puts "  #{colname} => orig: '#{cell}' (#{cell.class}) #{processed_str} #{checked_str} (column: '#{colspec[:colref]}')"
-        end
-      end
-    end
-    # return an array of strings with all the errors we have encounterd
-    # an empty array is a good news
-    def errors
-      @errors
-    end
-    def virtual_columns
-      # execute the virtual column specification
-      @table.each do |r|
-        @virtualcols.each do |virtualcol|
-          begin
-            # add the cell to the table
-            r[virtualcol[:name]] = {
-              value: virtualcol[:process].call(r),
-              virtual: true,
-            }
-          rescue => e
-            puts "dreader error at #{__callee__}: 'process' specification for :#{virtualcol[:name]} raised an exception at row #{r[r.keys.first][:row_number]}"
-            raise e
-          end
-        end
-      end
-    end
-    # apply the mapping code to the array
-    # it makes sense to invoke it only once
-    #
-    # the mapping is applied only if it defined
-    def process
-      @table.each do |r|
-        @mapping.call(r) if @mapping
-      end
-    end
-    def to_s
-      @table.to_s
-    end
-    private
-    def self.open_spreadsheet(filename)
-      case File.extname(filename)
-      when ".csv" then Roo::CSV.new(filename)
-      when ".tsv" then Roo::CSV.new(filename, csv_options: {col_sep: "\t"})
-      when ".ods" then Roo::OpenOffice.new(filename)
-      when ".xls" then Roo::Excel.new(filename)
-      when ".xlsx" then Roo::Excelx.new(filename)
-      else raise "Unknown extension: #{File.extname(filename)}"
-      end
-    end
-  end
-end

metadata CHANGED Viewed

@@ -1,57 +1,85 @@
 --- !ruby/object:Gem::Specification
 name: dreader
 version: !ruby/object:Gem::Version
-  version: 0.5.0
+  version: 1.1.0
 platform: ruby
 authors:
 - Adolfo Villafiorita
-autorequire:
+autorequire:
 bindir: exe
 cert_chain: []
-date: 2019-11-30 00:00:00.000000000 Z
+date: 2023-10-01 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
-  name: bundler
+  name: roo
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: '1.16'
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: fast_excel
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: debug
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 1.0.0
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: '1.16'
+        version: 1.0.0
 - !ruby/object:Gem::Dependency
-  name: rake
+  name: bundler
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '10.0'
+        version: '1.16'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '10.0'
+        version: '1.16'
 - !ruby/object:Gem::Dependency
-  name: roo
+  name: rake
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - ">="
+    - - "~>"
       - !ruby/object:Gem::Version
-        version: '0'
-  type: :runtime
+        version: '10.0'
+  type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - ">="
+    - - "~>"
       - !ruby/object:Gem::Version
-        version: '0'
+        version: '10.0'
 description: |-
   Use this gem to specify the structure of some tabular data
   you want to process.  The input data can be in CSV, LibreOffice, and Excel.  Each row
@@ -63,35 +91,43 @@ description: |-
   The gem should be relatively easy to use, despite its name. (Dread
   stands for *d*ata *r*eader)
 email:
-- adolfo.villafiorita@ict4g.net
+- adolfo@shair.tech
 executables: []
 extensions: []
 extra_rdoc_files: []
 files:
 - ".gitignore"
-- Changelog.org
+- CHANGELOG.org
 - Gemfile
 - Gemfile.lock
 - LICENSE.txt
-- README.md
+- README.org
 - Rakefile
 - bin/console
 - bin/setup
 - dreader.gemspec
 - examples/age/Birthdays.ods
 - examples/age/age.rb
+- examples/age_with_multiple_checks/Birthdays.ods
+- examples/age_with_multiple_checks/age_with_multiple_checks.rb
+- examples/local_vars/local_vars.rb
+- examples/template/template_generation.rb
 - examples/wikipedia_big_us_cities/big_us_cities.rb
 - examples/wikipedia_big_us_cities/cities_by_state.ods
 - examples/wikipedia_us_cities/us_cities.rb
 - examples/wikipedia_us_cities/us_cities.tsv
 - examples/wikipedia_us_cities/us_cities_bulk_declare.rb
 - lib/dreader.rb
+- lib/dreader/column.rb
+- lib/dreader/engine.rb
+- lib/dreader/options.rb
+- lib/dreader/util.rb
 - lib/dreader/version.rb
-homepage: https://ict4g.net/gitea/adolfo/dreader
+homepage: https://redmine.shair.tech/projects/dreader
 licenses:
 - MIT
 metadata: {}
-post_install_message:
+post_install_message:
 rdoc_options: []
 require_paths:
 - lib
@@ -106,8 +142,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.0.3
-signing_key:
+rubygems_version: 3.3.26
+signing_key:
 specification_version: 4
 summary: Process and import data from cvs and spreadsheets
 test_files: []

data/Changelog.org DELETED Viewed

@@ -1,20 +0,0 @@
-* Version 0.4.2
-** better error messages for process and check functions
-   dreader now captures exceptions raised by process and check and
-   prints and error message to stdout if an error is found.
-   the exception is then propagated in the standard way.
-** new method bulk_declare
-   bulk_declare allow to easily declare columns which don't need a
-   specific treatment
-** read will now complains if the argument passed is not a hash
-** virtualcols is now accessible (attr_reader)
-** fixed a bug with slice
-* Version 0.4.1
-** fixed an issue with ~read~: it always required a hash as input
-** changed syntax of ~debug~, which now accepts a hash as argument
-   This makes its syntax similar to ~read~.
-** improved output of ~debug~
-   By default ~debug~ now prints the output of ~process~ and ~check~.
-   You can disable this feature by passing ~process: false~ and/or ~check:
-   false~ to the ~debug~.  Notice that ~check~ implies ~process~.