eatr 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "eatl"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/bin/rspec ADDED
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+ #
4
+ # This file was generated by Bundler.
5
+ #
6
+ # The application 'rspec' is installed as part of a gem, and
7
+ # this file is here to facilitate running it.
8
+ #
9
+
10
+ require "pathname"
11
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
12
+ Pathname.new(__FILE__).realpath)
13
+
14
+ require "rubygems"
15
+ require "bundler/setup"
16
+
17
+ load Gem.bin_path("rspec-core", "rspec")
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/eatr.gemspec ADDED
@@ -0,0 +1,30 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'eatr/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "eatr"
8
+ spec.version = Eatr::VERSION
9
+ spec.authors = ["Greggory Rothmeier"]
10
+ spec.email = ["greggroth@gmail.com"]
11
+
12
+ spec.summary = %q{Configuration-based document parsing and transformation framework.}
13
+ spec.description = %q{Configuration-based document parsing and transformation framework.}
14
+ spec.homepage = ""
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
18
+ f.match(%r{^(test|spec|features)/})
19
+ end
20
+ spec.bindir = "exe"
21
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
+ spec.require_paths = ["lib"]
23
+
24
+ spec.add_runtime_dependency "nokogiri", "~> 1.6"
25
+
26
+ spec.add_development_dependency "bundler", "~> 1.13"
27
+ spec.add_development_dependency "rake", "~> 10.0"
28
+ spec.add_development_dependency "rspec", "~> 3.0"
29
+ spec.add_development_dependency "pry", "~>0.10"
30
+ end
data/lib/eatr.rb ADDED
@@ -0,0 +1,11 @@
1
+ require "eatr/version"
2
+ require "eatr/parse_value"
3
+ require "eatr/schema"
4
+ require "eatr/xml/document"
5
+ require "eatr/csv/document"
6
+ require "eatr/pipeline"
7
+ require "eatr/transformation_set"
8
+ require "eatr/transformation/add_date_id"
9
+
10
+ module Eatr
11
+ end
@@ -0,0 +1,49 @@
1
+ require 'csv'
2
+
3
+ module Eatr
4
+ module Csv
5
+ ValueNotFound = Class.new(StandardError)
6
+
7
+ class Document
8
+ include ParseValue
9
+ extend Forwardable
10
+
11
+ attr_reader :schema
12
+
13
+ def_delegator :schema,
14
+ :transformation_pipeline
15
+
16
+ def initialize(schema_path)
17
+ @schema = Schema.new(YAML.load(File.read(schema_path)))
18
+ end
19
+
20
+ def parse(csv_document_path)
21
+ objects = []
22
+
23
+ CSV.foreach(csv_document_path, headers: true) do |row|
24
+ obj = @schema.to_struct.new
25
+
26
+ @schema.fields.each do |field|
27
+ obj.public_send("#{field.name}=", value_at(row, field))
28
+ end
29
+
30
+ objects << obj
31
+ end
32
+
33
+ objects
34
+ end
35
+
36
+ private
37
+
38
+ def value_at(row, field)
39
+ if field.value
40
+ field.value
41
+ elsif text = row[field.csv_header]
42
+ parse_value(field, text)
43
+ elsif field.required?
44
+ raise ValueNotFound, "Unable to find '#{field.name}' with header '#{field.csv_header}'"
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,28 @@
1
+ module Eatr
2
+ class DotGenerator
3
+ attr_reader :tables
4
+
5
+ DEFAULT_TEMPLATE_PATH = "#{File.dirname(__FILE__)}/dot_template.dot"
6
+
7
+ def initialize(schema_paths, template_path: DEFAULT_TEMPLATE_PATH)
8
+ @tables = Array[schema_paths].flatten.map { |s| Schema.new(YAML.load(File.read(s))) }
9
+ @template_path = template_path
10
+ end
11
+
12
+ def to_dot
13
+ ERB.new(File.read(@template_path), nil, '-').result(binding)
14
+ end
15
+
16
+ private
17
+
18
+ def table_included?(belongs_to_str)
19
+ table_name, _ = belongs_to_str.split('.')
20
+ @tables.any? { |t| t.table_name == table_name }
21
+ end
22
+
23
+ def arrow_target(belongs_to_str)
24
+ table_name, column = belongs_to_str.split('.')
25
+ "\"#{table_name}\":\"#{column}\""
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,35 @@
1
+ strict digraph g {
2
+ ranksep="1.6"
3
+ graph [
4
+ rankdir = "LR"
5
+ ];
6
+ node [
7
+ fontsize = "16"
8
+ ];
9
+ edge [
10
+ arrowhead = "none"
11
+ ];
12
+ <%- tables.each do |table| -%>
13
+ "<%= table.table_name %>" [shape=none, margin=0, label=<
14
+ <table border="0" cellborder="1" cellspacing="0" cellpadding="4">
15
+ <tr><td bgcolor="lightblue"><%= table.table_name %></td></tr>
16
+ <%- table.flat_fields.each do |field| -%>
17
+ <tr><td port="<%= field.name %>" align="left"><%= field.name %></td></tr>
18
+ <%- end -%>
19
+ </table>>];
20
+ <%- end -%>
21
+ <%- tables.each do |table| -%>
22
+ <%- table.flat_fields.each do |field| -%>
23
+ <%- if field.belongs_to_one && table_included?(field.belongs_to_one)-%>
24
+ "<%= table.table_name %>":"<%= field.name %>" -> <%= arrow_target(field.belongs_to_one) %> [arrowhead="tee<%= 'odot' if !field.required? %>"];
25
+ <%- end -%>
26
+ <%- if field.has_many -%>
27
+ <%- field.has_many.each do |r| -%>
28
+ <%- if table_included?(r)-%>
29
+ "<%= table.table_name %>":"<%= field.name %>" -> <%= arrow_target(r) %> [arrowhead="crow<%= 'odot' if !field.required? %>"];
30
+ <%- end -%>
31
+ <%- end -%>
32
+ <%- end -%>
33
+ <%- end -%>
34
+ <%- end -%>
35
+ }
@@ -0,0 +1,25 @@
1
+ module Eatr
2
+ module ParseValue
3
+ def parse_value(field, text)
4
+ case field.type
5
+ when 'integer' then text.to_i
6
+ when 'float' then text.to_f
7
+ when 'timestamp'
8
+ if field.strptime
9
+ DateTime.strptime(text, field.strptime)
10
+ else
11
+ DateTime.parse(text)
12
+ end
13
+ when 'boolean' then YAML.load(text)
14
+ else
15
+ if field.max_length
16
+ text[0...field.max_length]
17
+ elsif field.length
18
+ text[0...field.length]
19
+ else
20
+ text
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,11 @@
1
+ module Eatr
2
+ class Pipeline
3
+ def initialize(steps)
4
+ @steps = steps
5
+ end
6
+
7
+ def call(row)
8
+ @steps.reduce(row) { |memo, step| step.call(memo) }
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,13 @@
1
+ require 'spec_helper'
2
+ require './lib/eatr/pipeline'
3
+
4
+ describe Eatr::Pipeline do
5
+ it 'performs each action on an object' do
6
+ pipeline = described_class.new([
7
+ ->(str) { str.upcase },
8
+ ->(str) { str[1..3] },
9
+ ])
10
+
11
+ expect(pipeline.call("hello")).to eq("ELL")
12
+ end
13
+ end
@@ -0,0 +1,94 @@
1
+ module Eatr
2
+ class Schema
3
+ class Field
4
+ def initialize(field_attributes)
5
+ @field_attributes = field_attributes
6
+ end
7
+
8
+ %w[
9
+ name
10
+ type
11
+ xpath
12
+ csv_header
13
+ strptime
14
+ value
15
+ max_length
16
+ length
17
+ belongs_to_one
18
+ has_many
19
+ ].each do |f|
20
+ define_method(f) do
21
+ @field_attributes[f]
22
+ end
23
+ end
24
+
25
+ def type
26
+ @field_attributes['type'].to_s.downcase
27
+ end
28
+
29
+ def node?
30
+ @field_attributes.has_key?('node')
31
+ end
32
+
33
+ def required?
34
+ @field_attributes.fetch('required', true)
35
+ end
36
+
37
+ def children
38
+ Array[*@field_attributes['children']].map { |f| Field.new(f) }
39
+ end
40
+ end
41
+
42
+ def initialize(schema_hash)
43
+ @schema = schema_hash
44
+ end
45
+
46
+ def fields
47
+ @fields ||= @schema.fetch('fields').map { |f| Field.new(f) }
48
+ end
49
+
50
+ def flat_fields
51
+ @flat_fields ||= fields.select(&:name).
52
+ concat(fields.flat_map(&:children))
53
+ end
54
+
55
+ def name
56
+ @schema.fetch('name', 'schema')
57
+ end
58
+
59
+ def table_name
60
+ @schema.fetch('table_name', name)
61
+ end
62
+
63
+ def remove_namespaces?
64
+ @schema.fetch('remove_namespaces', false)
65
+ end
66
+
67
+ def to_struct
68
+ @struct_klass ||= begin
69
+ Object.const_get("Struct::#{constant_name}")
70
+ rescue NameError
71
+ Struct.new(constant_name, *field_names)
72
+ end
73
+ end
74
+
75
+ def transformation_pipeline
76
+ Pipeline.new(TransformationSet.new(@schema.fetch('transformations', [])))
77
+ end
78
+
79
+ private
80
+
81
+ def constant_name
82
+ constantize(name)
83
+ end
84
+
85
+ def field_names
86
+ flat_fields.map { |f| f.name.to_sym }
87
+ end
88
+
89
+ def constantize(underscore_name)
90
+ underscore_name.split('_').map(&:capitalize).join
91
+ end
92
+
93
+ end
94
+ end
@@ -0,0 +1,52 @@
1
+ module Eatr
2
+ module Sql
3
+ class TableGenerator
4
+ def initialize(schema_path)
5
+ @schema = Schema.new(YAML.load(File.read(schema_path)))
6
+ end
7
+
8
+ def statement
9
+ <<-STATEMENT
10
+ CREATE TABLE #{@schema.table_name} (
11
+ #{column_defs.join(",\n ")}
12
+ );
13
+ STATEMENT
14
+ end
15
+
16
+ private
17
+
18
+ def column_defs
19
+ @schema.flat_fields.map do |f|
20
+ "#{f.name} #{type(f)}#{nullness(f)}"
21
+ end
22
+ end
23
+
24
+ def type(f)
25
+ case f.type
26
+ when nil,'string',''
27
+ if f.length
28
+ "CHAR(#{f.length})"
29
+ elsif f.max_length
30
+ "VARCHAR(#{f.max_length})"
31
+ else
32
+ 'TEXT'
33
+ end
34
+ when 'integer'
35
+ 'INT'
36
+ when 'float'
37
+ 'REAL'
38
+ when 'timestamp'
39
+ 'TIMESTAMP'
40
+ when 'boolean'
41
+ 'BOOLEAN'
42
+ end
43
+ end
44
+
45
+ def nullness(f)
46
+ if f.required?
47
+ " NOT NULL"
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,20 @@
1
+ module Eatr
2
+ module Transformation
3
+ class AddDateId
4
+ def initialize(args)
5
+ @source = args.fetch('source')
6
+ @destination = args.fetch('destination')
7
+ end
8
+
9
+ def call(*objs)
10
+ Array(objs.flatten).map do |o|
11
+ if !o[@source].nil?
12
+ o[@destination] = o[@source].strftime('%Y%m%d').to_i
13
+ end
14
+
15
+ o
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,27 @@
1
+ module Eatr
2
+ class TransformationSet
3
+ include Enumerable
4
+
5
+ def initialize(transformations)
6
+ @transformations = transformations
7
+ end
8
+
9
+ def each
10
+ to_a.each do |t|
11
+ yield t
12
+ end
13
+ end
14
+
15
+ def to_a
16
+ @transformations.map do |t|
17
+ const = Object.const_get(t.fetch('class'))
18
+
19
+ if t['args']
20
+ const.new(t['args'])
21
+ else
22
+ const.new
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end