RubyGems - structured_csv - Versions diffs - 0.1.0 → 0.1.1 - Mend

structured_csv 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

checksums.yaml +4 -4
data/.editorconfig +18 -0
data/.gitattributes +1 -0
data/.github/workflows/main.yml +6 -2
data/.gitignore +256 -6
data/.hound.yml +3 -0
data/.rubocop.yml +40 -3
data/Gemfile +7 -5
data/README.adoc +29 -3
data/exe/csv_join +27 -0
data/exe/csv_join.rb +1 -0
data/exe/structured_csv_to_yaml +30 -0
data/exe/structured_csv_to_yaml.rb +1 -0
data/lib/structured_csv.rb +3 -5
data/lib/structured_csv/common.rb +12 -0
data/lib/structured_csv/csv2yaml.rb +239 -0
data/lib/structured_csv/csv_join.rb +78 -0
data/lib/structured_csv/version.rb +1 -1
data/structured_csv.gemspec +16 -5
metadata +127 -5
data/exe/csv_join.rb +0 -95
data/exe/structured_csv_to_yaml.rb +0 -254

metadata CHANGED Viewed

@@ -1,26 +1,143 @@
 --- !ruby/object:Gem::Specification
 name: structured_csv
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.1.1
 platform: ruby
 authors:
 - Ribose Inc.
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2021-05-14 00:00:00.000000000 Z
-dependencies: []
+date: 2021-05-17 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: csv
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '3.1'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '3.1'
+- !ruby/object:Gem::Dependency
+  name: yaml
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.1'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.1'
+- !ruby/object:Gem::Dependency
+  name: byebug
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '11.1'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '11.1'
+- !ruby/object:Gem::Dependency
+  name: guard
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '2.17'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '2.17'
+- !ruby/object:Gem::Dependency
+  name: guard-rspec
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '4.7'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '4.7'
+- !ruby/object:Gem::Dependency
+  name: rake
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '13.0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '13.0'
+- !ruby/object:Gem::Dependency
+  name: rspec
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '3.10'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '3.10'
+- !ruby/object:Gem::Dependency
+  name: simplecov
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.21'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.21'
 description: Library to process structured CSV files
 email:
 - open.source@ribose.com
 executables:
+- csv_join
 - csv_join.rb
+- structured_csv_to_yaml
 - structured_csv_to_yaml.rb
 extensions: []
 extra_rdoc_files: []
 files:
+- ".editorconfig"
+- ".gitattributes"
 - ".github/workflows/main.yml"
 - ".gitignore"
+- ".hound.yml"
 - ".rspec"
 - ".rubocop.yml"
 - CODE_OF_CONDUCT.md
@@ -29,9 +146,14 @@ files:
 - Rakefile
 - bin/console
 - bin/setup
+- exe/csv_join
 - exe/csv_join.rb
+- exe/structured_csv_to_yaml
 - exe/structured_csv_to_yaml.rb
 - lib/structured_csv.rb
+- lib/structured_csv/common.rb
+- lib/structured_csv/csv2yaml.rb
+- lib/structured_csv/csv_join.rb
 - lib/structured_csv/version.rb
 - samples/T-SP-E.212B-2018.et.csv
 - samples/T-SP-E.212B-2018.et.yaml
@@ -53,14 +175,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: 2.4.0
+      version: 2.6.7
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.0.3
+rubygems_version: 3.1.4
 signing_key:
 specification_version: 4
 summary: Library to process structured CSV files

data/exe/csv_join.rb DELETED Viewed

@@ -1,95 +0,0 @@
-#!/usr/bin/env ruby
-require 'csv'
-require 'yaml'
-require 'pathname'
-module CsvJoin
-  def self.join(csv, section_name)
-    first_row = nil
-    last_row = -1
-    puts "section_name #{section_name}"
-    csv.each_with_index do |row, index|
-      if first_row.nil? && is_start_of_portion?(row, section_name)
-        puts "found first"
-        first_row = index+1
-        next
-      end
-      if !first_row.nil? && is_row_empty?(row)
-        puts "found last"
-        last_row = index
-        break
-      end
-    end
-    puts "first #{first_row}  last #{last_row}"
-    csv[first_row..last_row]
-  end
-  def self.load_csv(csvfile)
-    # puts csvfile
-    content = File.read(csvfile, encoding: "bom|utf-8").scrub
-    CSV.parse(content, liberal_parsing: true, encoding: "UTF-8")
-  end
-  def self.convert(csvdir, outfile)
-    raise "first argument must be a directory!" unless File.directory?(csvdir)
-    csv = CSV.open(outfile, "wb", encoding: "UTF-8")
-    csvfiles = Dir.glob(File.join(csvdir, "**", "*.csv")).sort
-    raise "directory must contain .csv files!" if csvfiles.empty?
-    # Assume all files use the same header structure as the first CSV file
-    header = []
-    csvheader = ""
-    csvfiles.each do |csvfile|
-      content = load_csv(csvfile)
-      csvheader = content.shift
-      if header.empty?
-        header = ['name'] + csvheader
-        csv << header
-      end
-      basename = Pathname.new(csvfile).basename.sub_ext('').to_s
-      content.each do |filerow|
-        row = []
-        filerow.each do |value|
-          row << case value
-                 when String
-                   value.strip
-                 else
-                   value
-                 end
-        end
-        all_empty = row.all? do |f|
-          f.nil? || f.empty?
-        end
-        next if all_empty
-        row.unshift(basename)
-        csv << row
-      end
-    end
-    csv
-  end
-end
-csvdir = ARGV.pop
-outfile = Pathname.new(csvdir).sub_ext(".csv").to_s
-# puts outfile
-CsvJoin.convert(csvdir, outfile)
-# puts CsvJoin.convert(csvdir)

data/exe/structured_csv_to_yaml.rb DELETED Viewed

@@ -1,254 +0,0 @@
-#!/usr/bin/env ruby
-require 'csv'
-require 'yaml'
-module CsvToStructuredHash
-  def self.get_portion(csv, section_name)
-    first_row = nil
-    last_row = -1
-    data_meta = {}
-    puts "section_name #{section_name}"
-    csv.each_with_index do |row, index|
-      if first_row.nil? && is_start_of_portion?(row, section_name)
-        # puts"found first"
-        row[1].split(';').each do |opt|
-          k, v = opt.split('=')
-          data_meta[k.to_sym] = v
-        end if row[1] && !row[1].empty?
-        first_row = index+1
-        next
-      end
-      if !first_row.nil? && is_row_empty?(row)
-        # puts "found last"
-        last_row = index
-        break
-      end
-    end
-    # puts "first #{first_row}  last #{last_row}"
-    {
-      first_row: first_row,
-      last_row: last_row,
-      rows: csv[first_row..last_row],
-      meta: data_meta
-    }
-  end
-  def self.is_start_of_portion?(row, section_name)
-    return false if row.first.nil?
-    row.first.strip.to_s == section_name.to_s
-  end
-  def self.is_row_empty?(row)
-    row.map do |f|
-      f.is_a?(String) ? f.strip : f
-    end.all?(&:nil?)
-  end
-  def self.get_csv(csv_filename)
-    content = File.read(csv_filename, encoding: "bom|utf-8")
-    CSV.parse(content, liberal_parsing: true, encoding: "UTF-8")
-  end
-  def self.split_header_key_type(header_field)
-    field_name = ""
-    field_type = CAST_DEFAULT_TYPE
-    # puts header_field
-    arr = header_field.match(/\A(.*)\[(.*)\]\Z/)
-    if arr.nil?
-      field_name = header_field
-    else
-      field_name = arr[1]
-      field_type = arr[2]
-    end
-    {
-      name: field_name,
-      type: field_type
-    }
-  end
-  CAST_DEFAULT_TYPE = "string".freeze
-  def self.cast_type(value, type_in_string)
-    return if value.nil?
-    type = type_in_string.downcase
-    case type
-    when "boolean"
-      if value.downcase == "true"
-        true
-      elsif value.downcase == "false"
-        false
-      end
-    when "integer"
-      value.to_s.strip.to_i
-    when "string"
-      value.to_s.strip
-    when /^array\{(.*)\}/
-      val_type = Regexp.last_match[1] || CAST_DEFAULT_TYPE
-      value.split(";").map do |v|
-        # puts "cast type as #{v}, #{val_type.to_s}"
-        cast_type(v, val_type.to_s)
-      end
-    else
-      value.to_s
-    end
-  end
-  def self.parse_metadata(rows)
-    hash = {}
-    rows.each_with_index do |row,index|
-      # Skip all the empty rows
-      next if is_row_empty?(row)
-      name_type = split_header_key_type(row.first)
-      key = name_type[:name]
-      type = name_type[:type]
-      value = cast_type(row[1], type)
-      hash[key] = value
-    end
-    # puts "=============================METADATA================="
-    # pp hash
-    normalize_namespaces(hash)
-  end
-  def self.parse_data(rows, data_meta)
-    header = []
-    data_name = data_meta[:name]
-    data_type = data_meta[:type] || "hash"
-    data_key = data_meta[:key]
-    base_structure = case data_type
-    when "hash"
-      {}
-    when "array"
-      []
-    end
-    rows.each_with_index do |row,index|
-      # Assume the first column is always the key
-      if index == 0
-        # puts "row #{row}"
-        header = row.map do |field|
-          split_header_key_type(field) unless field.nil?
-        end.compact
-        if data_type == "hash" && data_key.nil?
-          data_key = header.first
-        end
-        next
-      end
-      # puts "header #{header.inspect}"
-      # Skip all the empty rows
-      next if is_row_empty?(row)
-      # Skip if no key value
-      next if row[0].nil?
-      header_names = header.inject([]) do |acc,v|
-        acc << v[:name]
-      end
-      row_values = []
-      header.each_with_index do |h, i|
-        v = row[i]
-        v = v.strip unless v.nil?
-        row_values[i] = cast_type(v, h[:type])
-      end
-      k = row_values[0]
-      d = Hash[header_names[0..-1].zip(row_values[0..-1])]
-      #  .transform_keys { |k| k.to_sym }
-      # Remove keys if they point to nil
-      d.keys.each do |k|
-        d.delete(k) if d[k].nil?
-      end
-      case data_type
-      when "hash"
-        unless base_structure[k].nil?
-          puts "[WARNING] there is already data inside key [#{k}] -- maybe you should set type=array?"
-        end
-        base_structure[k] = normalize_namespaces(d)
-      when "array"
-        base_structure << normalize_namespaces(d)
-      end
-    end
-    if data_name
-      base_structure = {
-        data_name => base_structure
-      }
-    end
-    base_structure
-  end
-  def self.convert(csv_filename)
-    raw_data = get_csv(csv_filename)
-    metadata_section = get_portion(raw_data, "METADATA")
-    data_section = get_portion(raw_data, "DATA")
-    # puts '----------'
-    # pp data_section[:rows]
-    # puts '----------'
-    {
-      "metadata" => parse_metadata(metadata_section[:rows]),
-      "data" => parse_data(data_section[:rows], data_section[:meta])
-    }
-  end
-  # Structure all child hashes if the key is namespaced.
-  # e.g. { "hello.me" => data } becomes
-  #  { "hello" => { "me" => data } }
-  #
-  def self.normalize_namespaces(hash)
-    new_hash = {}
-    hash.each_pair do |k, v|
-      # puts"k (#{k}) v (#{v})"
-      key_components = k.to_s.split('.')
-      level = new_hash
-      last_component = key_components.pop
-      key_components.each do |component|
-        # puts"c (#{component})"
-        level[component] ||= {}
-        level = level[component]
-      end
-      level[last_component] = v
-    end
-    new_hash
-  end
-end
-csvfile = ARGV.pop
-raise "first argument must be a .csv file!" unless csvfile =~ /\.csv$/
-outfile = csvfile.gsub(/csv$/, "yaml")
-IO.write(
-  outfile,
-  CsvToStructuredHash.convert(csvfile).to_yaml
-)
-# pp CsvToStructuredHash.convert(filename)