RubyGems - structured_csv - Versions diffs - 0.1.0 → 0.1.1 - Mend

structured_csv 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

checksums.yaml +4 -4
data/.editorconfig +18 -0
data/.gitattributes +1 -0
data/.github/workflows/main.yml +6 -2
data/.gitignore +256 -6
data/.hound.yml +3 -0
data/.rubocop.yml +40 -3
data/Gemfile +7 -5
data/README.adoc +29 -3
data/exe/csv_join +27 -0
data/exe/csv_join.rb +1 -0
data/exe/structured_csv_to_yaml +30 -0
data/exe/structured_csv_to_yaml.rb +1 -0
data/lib/structured_csv.rb +3 -5
data/lib/structured_csv/common.rb +12 -0
data/lib/structured_csv/csv2yaml.rb +239 -0
data/lib/structured_csv/csv_join.rb +78 -0
data/lib/structured_csv/version.rb +1 -1
data/structured_csv.gemspec +16 -5
metadata +127 -5
data/exe/csv_join.rb +0 -95
data/exe/structured_csv_to_yaml.rb +0 -254

data/exe/csv_join ADDED Viewed

@@ -0,0 +1,27 @@
+#!/usr/bin/env ruby
+# resolve bin path, ignoring symlinks
+require "pathname"
+bin_file = Pathname.new(__FILE__).realpath
+# add self to libpath
+$:.unshift File.expand_path("../../lib", bin_file)
+# Fixes https://github.com/rubygems/rubygems/issues/1420
+require "rubygems/specification"
+class Gem::Specification
+  def this
+    self
+  end
+end
+require "structured_csv/csv_join"
+csvdir  = ARGV.pop
+outfile = Pathname.new(csvdir).sub_ext(".csv").to_s
+# puts outfile
+StructuredCsv::CsvJoin.convert(csvdir, outfile)
+# puts StructuredCsv::CsvJoin.convert(csvdir)

data/exe/csv_join.rb ADDED Viewed

	@@ -0,0 +1 @@
1	+ exe/csv_join

data/exe/structured_csv_to_yaml ADDED Viewed

@@ -0,0 +1,30 @@
+#!/usr/bin/env ruby
+# resolve bin path, ignoring symlinks
+require "pathname"
+bin_file = Pathname.new(__FILE__).realpath
+# add self to libpath
+$:.unshift File.expand_path("../../lib", bin_file)
+# Fixes https://github.com/rubygems/rubygems/issues/1420
+require "rubygems/specification"
+class Gem::Specification
+  def this
+    self
+  end
+end
+require "structured_csv/csv2yaml"
+csvfile = ARGV.pop
+raise "first argument must be a .csv file!" unless /\.csv$/.match?(csvfile)
+outfile = csvfile.gsub(/csv$/, "yaml")
+IO.write(
+  outfile,
+  StructuredCsv::CsvTo2Yaml.convert(csvfile).to_yaml,
+)
+# pp Csv2Yaml.convert(filename)

data/exe/structured_csv_to_yaml.rb ADDED Viewed

	@@ -0,0 +1 @@
1	+ exe/structured_csv_to_yaml

data/lib/structured_csv.rb CHANGED Viewed

@@ -1,8 +1,6 @@
 # frozen_string_literal: true
 require_relative "structured_csv/version"
-module StructuredCsv
-  class Error < StandardError; end
-  # Your code goes here...
-end
+require_relative "structured_csv/common"
+require_relative "structured_csv/csv2yaml"
+require_relative "structured_csv/csv_join"

data/lib/structured_csv/common.rb ADDED Viewed

@@ -0,0 +1,12 @@
+require "csv"
+module StructuredCsv
+  module Common
+    def self.load_csv(csvfile)
+      # warn csvfile
+      content = File.read(csvfile, encoding: "bom|utf-8").scrub
+      CSV.parse(content, liberal_parsing: true, encoding: "UTF-8")
+    end
+  end
+end

data/lib/structured_csv/csv2yaml.rb ADDED Viewed

@@ -0,0 +1,239 @@
+require "csv"
+require "yaml"
+module StructuredCsv
+  module Csv2Yaml
+    def self.get_portion(csv, section_name)
+      first_row = nil
+      last_row  = -1
+      data_meta = {}
+      warn "section_name #{section_name}"
+      csv.each_with_index do |row, index|
+        if first_row.nil? && is_start_of_portion?(row, section_name)
+          # warn"found first"
+          if row[1] && !row[1].empty?
+            row[1].split(";").each do |opt|
+              k, v                = opt.split("=")
+              data_meta[k.to_sym] = v
+            end
+          end
+          first_row = index + 1
+          next
+        end
+        next unless !first_row.nil? && is_row_empty?(row)
+        # warn "found last"
+        last_row = index
+        break
+      end
+      # warn "first #{first_row}  last #{last_row}"
+      {
+        first_row: first_row,
+        last_row:  last_row,
+        rows:      csv[(first_row.nil? ? 0 : first_row)..last_row],
+        meta:      data_meta
+      }
+    end
+    def self.is_start_of_portion?(row, section_name)
+      return false if row.first.nil?
+      row.first.strip.to_s == section_name.to_s
+    end
+    def self.is_row_empty?(row)
+      row.map do |f|
+        f.is_a?(String) ? f.strip : f
+      end.all?(&:nil?)
+    end
+    def self.split_header_key_type(header_field)
+      field_name = ""
+      field_type = CAST_DEFAULT_TYPE
+      # warn header_field
+      arr = header_field.match(/\A([^\[]*)\[(.*)\]\Z/)
+      if arr.nil?
+        field_name = header_field
+      else
+        field_name = arr[1]
+        field_type = arr[2]
+      end
+      {
+        name: field_name,
+        type: field_type
+      }
+    end
+    CAST_DEFAULT_TYPE = "string".freeze
+    def self.cast_type(value, type_in_string)
+      return if value.nil?
+      type = type_in_string.downcase
+      case type
+      when "boolean"
+        if value == "true"
+          true
+        elsif value == "false"
+          false
+        end
+      when "integer"
+        value.to_s.strip.to_i
+      when "string"
+        value.to_s.strip
+      when /^array\{(.*)\}/
+        val_type = Regexp.last_match[1] || CAST_DEFAULT_TYPE
+        value.split(";").map do |v|
+          # warn "cast type as #{v}, #{val_type.to_s}"
+          cast_type(v, val_type.to_s)
+        end
+      else
+        value.to_s
+      end
+    end
+    def self.parse_metadata(rows)
+      hash = {}
+      rows.each_with_index do |row, _index|
+        # Skip all the empty rows
+        next if is_row_empty?(row)
+        name_type = split_header_key_type(row.first)
+        key       = name_type[:name]
+        type      = name_type[:type]
+        value     = cast_type(row[1], type)
+        hash[key] = value
+      end
+      # warn "=============================METADATA================="
+      # pp hash
+      normalize_namespaces(hash)
+    end
+    def self.parse_data(rows, data_meta)
+      header    = []
+      data_name = data_meta[:name]
+      data_type = data_meta[:type] || "hash"
+      data_key  = data_meta[:key]
+      base_structure = case data_type
+                       when "hash"
+                         {}
+                       when "array"
+                         []
+                       end
+      rows.each_with_index do |row, index|
+        # Assume the first column is always the key
+        if index == 0
+          # warn "row #{row}"
+          header = row.map do |field|
+            split_header_key_type(field) unless field.nil?
+          end.compact
+          data_key = header.first if data_type == "hash" && data_key.nil?
+          next
+        end
+        # warn "header #{header.inspect}"
+        # Skip all the empty rows
+        next if is_row_empty?(row)
+        # Skip if no key value
+        next if row[0].nil?
+        header_names = header.inject([]) do |acc, v|
+          acc << v[:name]
+        end
+        row_values = []
+        header.each_with_index do |h, i|
+          v             = row[i]
+          v             = v.strip unless v.nil?
+          row_values[i] = cast_type(v, h[:type])
+        end
+        k = row_values[0]
+        d = Hash[header_names[0..-1].zip(row_values[0..-1])]
+        #  .transform_keys { |k| k.to_sym }
+        # Remove keys if they point to nil
+        d.keys.each do |k|
+          d.delete(k) if d[k].nil?
+        end
+        case data_type
+        when "hash"
+          unless base_structure[k].nil?
+            warn "[WARNING] there is already data inside key [#{k}] -- maybe you should set type=array?"
+          end
+          base_structure[k] = normalize_namespaces(d)
+        when "array"
+          base_structure << normalize_namespaces(d)
+        end
+      end
+      if data_name
+        base_structure = {
+          data_name => base_structure
+        }
+      end
+      base_structure
+    end
+    def self.convert(csv_filename)
+      raw_data = StructuredCsv::Common.load_csv(csv_filename)
+      metadata_section = get_portion(raw_data, "METADATA")
+      data_section     = get_portion(raw_data, "DATA")
+      # warn '----------'
+      # pp data_section[:rows]
+      # warn '----------'
+      {
+        "metadata" => parse_metadata(metadata_section[:rows]),
+        "data"     => parse_data(data_section[:rows], data_section[:meta])
+      }
+    end
+    # Structure all child hashes if the key is namespaced.
+    # e.g. { "hello.me" => data } becomes
+    #  { "hello" => { "me" => data } }
+    #
+    def self.normalize_namespaces(hash)
+      new_hash = {}
+      hash.each_pair do |k, v|
+        # warn"k (#{k}) v (#{v})"
+        key_components = k.to_s.split(".")
+        level          = new_hash
+        last_component = key_components.pop
+        key_components.each do |component|
+          # warn"c (#{component})"
+          level[component] ||= {}
+          level              = level[component]
+        end
+        level[last_component] = v
+      end
+      new_hash
+    end
+  end
+end

data/lib/structured_csv/csv_join.rb ADDED Viewed

@@ -0,0 +1,78 @@
+require "csv"
+require "yaml"
+require "pathname"
+module StructuredCsv
+  module CsvJoin
+    def self.join(csv, section_name)
+      first_row = nil
+      last_row  = -1
+      warn "section_name #{section_name}"
+      csv.each_with_index do |row, index|
+        if first_row.nil? && Csv2Yaml.is_start_of_portion?(row, section_name)
+          warn "found first"
+          first_row = index + 1
+          next
+        end
+        next unless !first_row.nil? && Csv2Yaml.is_row_empty?(row)
+        warn "found last"
+        last_row = index
+        break
+      end
+      warn "first #{first_row}  last #{last_row}"
+      csv[first_row..last_row]
+    end
+    def self.convert(csvdir, outfile)
+      raise "first argument must be a directory!" unless File.directory?(csvdir)
+      csv = CSV.open(outfile, "wb", encoding: "UTF-8")
+      csvfiles = Dir.glob(File.join(csvdir, "**", "*.csv")).sort
+      raise "directory must contain .csv files!" if csvfiles.empty?
+      # Assume all files use the same header structure as the first CSV file
+      header    = []
+      csvheader = ""
+      csvfiles.each do |csvfile|
+        content = StructuredCsv::Common.load_csv(csvfile)
+        csvheader = content.shift
+        if header.empty?
+          header = ["name"] + csvheader
+          csv << header
+        end
+        basename = Pathname.new(csvfile).basename.sub_ext("").to_s
+        content.each do |filerow|
+          row = []
+          filerow.each do |value|
+            row << case value
+                   when String
+                     value.strip
+                   else
+                     value
+                   end
+          end
+          all_empty = row.all? do |f|
+            f.nil? || f.empty?
+          end
+          next if all_empty
+          row.unshift(basename)
+          csv << row
+        end
+      end
+      csv.close
+    end
+  end
+end

data/lib/structured_csv/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module StructuredCsv
-  VERSION = "0.1.0"
+  VERSION = "0.1.1"
 end

data/structured_csv.gemspec CHANGED Viewed

@@ -8,12 +8,12 @@ Gem::Specification.new do |spec|
   spec.authors       = ["Ribose Inc."]
   spec.email         = ["open.source@ribose.com"]
-  spec.summary       = "Library to process structured CSV files"
-  spec.description   = "Library to process structured CSV files"
-  spec.homepage      = "https://open.ribose.com"
-  spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
+  spec.summary               = "Library to process structured CSV files"
+  spec.description           = "Library to process structured CSV files"
+  spec.homepage              = "https://open.ribose.com"
+  spec.required_ruby_version = Gem::Requirement.new(">= 2.6.7")
-  spec.metadata["homepage_uri"] = spec.homepage
+  spec.metadata["homepage_uri"]    = spec.homepage
   spec.metadata["source_code_uri"] = "https://github.com/riboseinc/structured_csv"
   # spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."
@@ -31,4 +31,15 @@ Gem::Specification.new do |spec|
   # For more information and examples about making a new gem, checkout our
   # guide at: https://bundler.io/guides/creating_gem.html
+  spec.add_dependency "csv", "~> 3.1"
+  # spec.add_dependency "pathname", "~> 0.1"
+  spec.add_dependency "yaml", "~> 0.1"
+  spec.add_development_dependency "byebug", "~> 11.1"
+  spec.add_development_dependency "guard", "~> 2.17"
+  spec.add_development_dependency "guard-rspec", "~> 4.7"
+  spec.add_development_dependency "rake", "~> 13.0"
+  spec.add_development_dependency "rspec", "~> 3.10"
+  spec.add_development_dependency "simplecov", "~> 0.21"
 end