importu 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/Gemfile +4 -0
- data/README.md +163 -0
- data/importu.gemspec +29 -0
- data/lib/importu.rb +12 -0
- data/lib/importu/converters.rb +82 -0
- data/lib/importu/core_ext.rb +3 -0
- data/lib/importu/core_ext/array/deep_freeze.rb +7 -0
- data/lib/importu/core_ext/deep_freeze.rb +3 -0
- data/lib/importu/core_ext/hash/deep_freeze.rb +7 -0
- data/lib/importu/core_ext/object/deep_freeze.rb +6 -0
- data/lib/importu/dsl.rb +127 -0
- data/lib/importu/exceptions.rb +34 -0
- data/lib/importu/importer.rb +119 -0
- data/lib/importu/importer/csv.rb +52 -0
- data/lib/importu/importer/json.rb +45 -0
- data/lib/importu/importer/xml.rb +55 -0
- data/lib/importu/record.rb +124 -0
- data/lib/importu/version.rb +3 -0
- data/spec/factories/importer.rb +12 -0
- data/spec/factories/importer_record.rb +13 -0
- data/spec/factories/json_importer.rb +14 -0
- data/spec/factories/xml_importer.rb +12 -0
- data/spec/lib/importu/converters_spec.rb +276 -0
- data/spec/lib/importu/dsl_spec.rb +26 -0
- data/spec/lib/importu/exceptions_spec.rb +96 -0
- data/spec/lib/importu/importer/json_spec.rb +37 -0
- data/spec/lib/importu/importer/xml_spec.rb +14 -0
- data/spec/lib/importu/record_spec.rb +123 -0
- data/spec/spec_helper.rb +14 -0
- data/spec/support/matchers/delegate_matcher.rb +42 -0
- metadata +218 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,163 @@
|
|
1
|
+
## Overview
|
2
|
+
Importu is a framework and DSL for simplifying the process of importing
|
3
|
+
structured data into your application. It is also a tool for separating
|
4
|
+
import-related business logic from the rest of your code.
|
5
|
+
|
6
|
+
Current supported source formats include CSV/TSV, XML and JSON. It is fairly
|
7
|
+
trivial to extend Importu to handle additional formats. See the
|
8
|
+
`lib/importu/importer` directory for implementations of supported importers.
|
9
|
+
|
10
|
+
The current version of Importu depends on both ActiveRecord and ActiveSupport,
|
11
|
+
which will become optional in a future release.
|
12
|
+
|
13
|
+
## Example
|
14
|
+
**Please read the tutorial in the
|
15
|
+
[import-examples](https://github.com/dhedlund/importu-examples) repository for
|
16
|
+
a more complete overview of available features.**
|
17
|
+
|
18
|
+
Assuming you have the following data in the file `data.csv`:
|
19
|
+
```
|
20
|
+
"isbn10","title","author","release_date","pages"
|
21
|
+
"0596516177","The Ruby Programming Language","David Flanagan and Yukihiro Matsumoto","Feb 1, 2008","448"
|
22
|
+
"1449355978","Computer Science Programming Basics in Ruby","Ophir Frieder, Gideon Frieder and David Grossman","May 1, 2013","188"
|
23
|
+
"0596523696","Ruby Cookbook"," Lucas Carlson and Leonard Richardson","Jul 26, 2006","910"
|
24
|
+
```
|
25
|
+
|
26
|
+
You can create a minimal importer to read the CSV data:
|
27
|
+
```ruby
|
28
|
+
class BookImporter < Importu::Importer::Csv
|
29
|
+
# fields we expect to find in the CSV file, field order is not important
|
30
|
+
fields :title, :author, :isbn10, :pages, :release_date
|
31
|
+
end
|
32
|
+
```
|
33
|
+
|
34
|
+
And then load that data in your application:
|
35
|
+
```ruby
|
36
|
+
require 'importu'
|
37
|
+
|
38
|
+
filename = File.expand_path('../data.csv', __FILE__)
|
39
|
+
importer = BookImporter.new(filename)
|
40
|
+
|
41
|
+
# importer.records returns an Enumerable
|
42
|
+
importer.records.count # => 3
|
43
|
+
importer.records.select {|r| r[:author] =~ /Matsumoto/ }.count # => 1
|
44
|
+
importer.records.each do |record|
|
45
|
+
# ...
|
46
|
+
end
|
47
|
+
|
48
|
+
importer.records.map(&:to_hash)
|
49
|
+
```
|
50
|
+
|
51
|
+
A more complete example of the book importer above might look like the following:
|
52
|
+
```ruby
|
53
|
+
require 'importu'
|
54
|
+
|
55
|
+
class BookImporter < Importu::Importer::Csv
|
56
|
+
# if you want to define multiple fields with similar rules, use 'fields'
|
57
|
+
# NOTE: ':required => true' is redundant in this example; any defined
|
58
|
+
# fields must have a corresponding column in the source data by default
|
59
|
+
fields :title, :isbn10, :authors, :required => true
|
60
|
+
|
61
|
+
# to mark a field as optional in the source data
|
62
|
+
field :pages, :required => false
|
63
|
+
|
64
|
+
# you can reference the same field multiple times and apply rules
|
65
|
+
# incrementally; this provides a lot of flexibility in describing your
|
66
|
+
# importer rules, such as grouping all the required fields together and
|
67
|
+
# explicitly stating that "these are required"; the importer becomes the
|
68
|
+
# reference document:
|
69
|
+
#
|
70
|
+
# fields :title, :isbn10, :authors, :release_date, :required => true
|
71
|
+
# fields :pages, :required => false
|
72
|
+
#
|
73
|
+
# ...or keep all the rules for that field with that field, whatever makes
|
74
|
+
# sense for your particular use case.
|
75
|
+
|
76
|
+
# if your field is not named the same as the source data, you can use
|
77
|
+
# :label => '...' to reference the correct field, where the label is what
|
78
|
+
# the field is labelled in the source data
|
79
|
+
field :authors, :label => 'author'
|
80
|
+
|
81
|
+
# you can convert fields using one of the built-in converters
|
82
|
+
field :pages, &convert_to(:integer)
|
83
|
+
field :release_date, &convert_to(:date) # date format is guessed
|
84
|
+
|
85
|
+
# some converters allow you to pass additional arguments; in the case of
|
86
|
+
# the date converter, you can pass an explicit format and it will raise an
|
87
|
+
# error if a date is encountered that doesn't match
|
88
|
+
field :release_date, &convert_to(:date, :format => '%b %d, %Y')
|
89
|
+
|
90
|
+
# passing a block to a field definition allows you to add your own logic
|
91
|
+
# for converting data or checking for unexpected values
|
92
|
+
field :authors do
|
93
|
+
value = clean(:authors) # apply :clean converter which strips whitespace
|
94
|
+
authors = value ? value.split(/(?:, )|(?: and )|(?: & )/i) : []
|
95
|
+
|
96
|
+
if authors.none?
|
97
|
+
# ArgumentError will be converted to an Importu::FieldParseError, which
|
98
|
+
# will include the name of the field affected
|
99
|
+
raise ArgumentError, "at least one author is required"
|
100
|
+
end
|
101
|
+
|
102
|
+
authors
|
103
|
+
end
|
104
|
+
|
105
|
+
# abstract fields that are not part of the original data set can be created
|
106
|
+
field :by_matz, :abstract => true do
|
107
|
+
# field conversion rules can reference other fields; the field value is
|
108
|
+
# what would be returned after referenced field's rules have been applied
|
109
|
+
field_value(:authors).include?('Yukihiro Matsumoto')
|
110
|
+
end
|
111
|
+
end
|
112
|
+
```
|
113
|
+
|
114
|
+
A more condensed version of the above, with all the rules grouped into individual field definitions:
|
115
|
+
```ruby
|
116
|
+
class BookImporter < Importu::Importer::Csv
|
117
|
+
fields :title, :isbn10
|
118
|
+
|
119
|
+
field :authors, :label => 'author' do
|
120
|
+
authors = clean(:authors).to_s.split(/(?:, )|(?: and )|(?: & )/i)
|
121
|
+
raise ArgumentError, "at least one author is required" if authors.none?
|
122
|
+
|
123
|
+
authors
|
124
|
+
end
|
125
|
+
|
126
|
+
field :pages, :required => false, &convert_to(:integer)
|
127
|
+
field :release_date, &convert_to(:date, :format => '%b %d, %Y')
|
128
|
+
|
129
|
+
field :by_matz, :abstract => true do
|
130
|
+
field_value(:authors).include?('Yukihiro Matsumoto')
|
131
|
+
end
|
132
|
+
end
|
133
|
+
```
|
134
|
+
|
135
|
+
### Rails / ActiveRecord
|
136
|
+
If you define a model in the importer definition and the importer fields are
|
137
|
+
named the same as the attributes in your model, Importu can iterate through and
|
138
|
+
create or update records for you:
|
139
|
+
|
140
|
+
```ruby
|
141
|
+
class BookImporter < Importu::Importer::Csv
|
142
|
+
model 'Book'
|
143
|
+
|
144
|
+
# ...
|
145
|
+
end
|
146
|
+
|
147
|
+
filename = File.expand_path('../data.csv', __FILE__)
|
148
|
+
importer = BookImporter.new(filename)
|
149
|
+
|
150
|
+
importer.import!
|
151
|
+
|
152
|
+
importer.total # => 3
|
153
|
+
importer.invalid # => 0
|
154
|
+
importer.created # => 3
|
155
|
+
importer.updated # => 0
|
156
|
+
importer.unchanged # => 0
|
157
|
+
|
158
|
+
importer.import!
|
159
|
+
|
160
|
+
importer.total # => 3
|
161
|
+
importer.created # => 0
|
162
|
+
importer.unchanged # => 3
|
163
|
+
```
|
data/importu.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
$LOAD_PATH.unshift File.expand_path('../lib', __FILE__)
|
2
|
+
require 'importu/version'
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = 'importu'
|
6
|
+
s.version = Importu::VERSION
|
7
|
+
s.platform = Gem::Platform::RUBY
|
8
|
+
s.authors = ['Daniel Hedlund']
|
9
|
+
s.email = ['daniel@lincracy.com']
|
10
|
+
s.homepage = 'https://github.com/lincracy/importu'
|
11
|
+
s.summary = 'A framework for importing data'
|
12
|
+
s.description = 'Importu is a framework for importing data'
|
13
|
+
|
14
|
+
s.files = `git ls-files`.split("\n")
|
15
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
16
|
+
s.require_paths = ['lib']
|
17
|
+
|
18
|
+
s.licenses = ['MIT']
|
19
|
+
|
20
|
+
s.add_dependency 'activesupport', ['>= 3.0.0']
|
21
|
+
s.add_dependency 'activerecord', ['>= 3.0.0']
|
22
|
+
s.add_dependency 'multi_json', ['~> 1.0']
|
23
|
+
s.add_dependency 'nokogiri'
|
24
|
+
|
25
|
+
s.add_development_dependency 'bundler', ['>= 1.0.0']
|
26
|
+
s.add_development_dependency 'rspec', ['>= 0']
|
27
|
+
s.add_development_dependency 'rdoc', ['>= 0']
|
28
|
+
s.add_development_dependency 'factory_girl', ['>= 3.5.0']
|
29
|
+
end
|
data/lib/importu.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
module Importu; end
|
2
|
+
|
3
|
+
require 'importu/core_ext'
|
4
|
+
|
5
|
+
require 'importu/dsl'
|
6
|
+
require 'importu/exceptions'
|
7
|
+
require 'importu/converters'
|
8
|
+
require 'importu/record'
|
9
|
+
require 'importu/importer'
|
10
|
+
require 'importu/importer/csv'
|
11
|
+
require 'importu/importer/json'
|
12
|
+
require 'importu/importer/xml'
|
@@ -0,0 +1,82 @@
|
|
1
|
+
require 'active_support/core_ext/object/blank'
|
2
|
+
require 'active_support/core_ext/date_time/conversions'
|
3
|
+
require 'active_support/concern'
|
4
|
+
|
5
|
+
require 'bigdecimal'
|
6
|
+
|
7
|
+
module Importu::Converters
|
8
|
+
extend ActiveSupport::Concern
|
9
|
+
|
10
|
+
included do
|
11
|
+
converter :raw do |name,options|
|
12
|
+
definition = definitions[name] \
|
13
|
+
or raise "importer field not defined: #{name}"
|
14
|
+
|
15
|
+
label = definition[:label]
|
16
|
+
raise Importu::MissingField, definition unless data.key?(label)
|
17
|
+
data[label]
|
18
|
+
end
|
19
|
+
|
20
|
+
converter :clean do |name,options|
|
21
|
+
value = convert(name, :raw, options)
|
22
|
+
value.is_a?(String) \
|
23
|
+
? (value.blank? ? nil : value.strip)
|
24
|
+
: value
|
25
|
+
end
|
26
|
+
|
27
|
+
converter :string do |name,options|
|
28
|
+
convert(name, :clean, options).try(:to_s)
|
29
|
+
end
|
30
|
+
|
31
|
+
converter :integer do |name,options|
|
32
|
+
value = convert(name, :clean, options)
|
33
|
+
value.nil? ? nil : Integer(value)
|
34
|
+
end
|
35
|
+
|
36
|
+
converter :float do |name,options|
|
37
|
+
value = convert(name, :clean, options)
|
38
|
+
value.nil? ? nil : Float(value)
|
39
|
+
end
|
40
|
+
|
41
|
+
converter :decimal do |name,options|
|
42
|
+
value = convert(name, :clean, options)
|
43
|
+
case value
|
44
|
+
when nil then nil
|
45
|
+
when BigDecimal then value
|
46
|
+
when /\A-?\d+(?:\.\d+)?\Z/ then BigDecimal(value)
|
47
|
+
else raise ArgumentError, "invalid decimal value '#{value}'"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
converter :boolean do |name,options|
|
52
|
+
value = convert(name, :clean, options)
|
53
|
+
case value
|
54
|
+
when nil then nil
|
55
|
+
when true, 'true', 'yes', '1', 1 then true
|
56
|
+
when false, 'false', 'no', '0', 0 then false
|
57
|
+
else raise ArgumentError, "invalid boolean value '#{value}'"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
converter :date do |name,options|
|
62
|
+
if value = convert(name, :clean, options)
|
63
|
+
# TODO: options[:date_format] is deprecated
|
64
|
+
date_format = options[:date_format] || options[:format]
|
65
|
+
date_format \
|
66
|
+
? Date.strptime(value, date_format)
|
67
|
+
: Date.parse(value)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
converter :datetime do |name,options|
|
72
|
+
if value = convert(name, :clean, options)
|
73
|
+
# TODO: options[:date_format] is deprecated
|
74
|
+
date_format = options[:date_format] || options[:format]
|
75
|
+
date_format \
|
76
|
+
? DateTime.strptime(value, date_format).utc
|
77
|
+
: DateTime.parse(value).utc
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
end
|
data/lib/importu/dsl.rb
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
require 'active_support/core_ext/module/delegation'
|
2
|
+
require 'active_support/core_ext/hash/deep_dup'
|
3
|
+
require 'active_support/core_ext/hash/keys'
|
4
|
+
require 'active_support/concern'
|
5
|
+
|
6
|
+
require 'importu/core_ext/deep_freeze'
|
7
|
+
|
8
|
+
# importer definition examples:
|
9
|
+
# allow_actions :create
|
10
|
+
# allow_actions :create, :update
|
11
|
+
#
|
12
|
+
# find_by :id # match against a single field, :id (default)
|
13
|
+
# find_by [:name, :date] # match against multiple fields
|
14
|
+
# find_by :id, [:name, :date] # try name/date combo if no id match
|
15
|
+
# find_by nil # never try to look up records, assume :create
|
16
|
+
# find_by do |record|
|
17
|
+
# scoped.where(:foo => record[:name].downcase)
|
18
|
+
# end
|
19
|
+
#
|
20
|
+
# field :field1, :label => 'Field 1'
|
21
|
+
# fields :field1, :field2, :field3
|
22
|
+
# fields :field1, :field2, convert_to(:integer)
|
23
|
+
# fields :field1, :field2 do |data,definition|
|
24
|
+
# Time.strptime(data[definition[:label]], '%d/%m/%Y')
|
25
|
+
# end
|
26
|
+
#
|
27
|
+
# allow actions:
|
28
|
+
# :create - if an existing record can't be found, we can create it
|
29
|
+
# :update - if an existing record found, update its attributes
|
30
|
+
#
|
31
|
+
# field(s) definition options:
|
32
|
+
# :label - header/label/key/element name used in input file (default: field name)
|
33
|
+
# :required - must be present in input file (values can be blank, default: true)
|
34
|
+
|
35
|
+
require 'active_support/concern'
|
36
|
+
|
37
|
+
module Importu::Dsl
|
38
|
+
extend ActiveSupport::Concern
|
39
|
+
|
40
|
+
included do
|
41
|
+
config_dsl :record_class, :default => Importu::Record
|
42
|
+
config_dsl :model, :description
|
43
|
+
config_dsl :allowed_actions, :default => [:create]
|
44
|
+
config_dsl :finder_fields, :default => [[:id]]
|
45
|
+
config_dsl :definitions, :default => {}
|
46
|
+
config_dsl :preprocessor, :postprocessor
|
47
|
+
config_dsl :converters, :default => {}
|
48
|
+
end
|
49
|
+
|
50
|
+
module ClassMethods
|
51
|
+
def allow_actions(*actions)
|
52
|
+
@allowed_actions = actions
|
53
|
+
end
|
54
|
+
|
55
|
+
def find_by(*field_groups, &block)
|
56
|
+
@finder_fields = block ? [block] : field_groups.map {|g|g&&[*g]}.compact
|
57
|
+
end
|
58
|
+
|
59
|
+
def fields(*fields, &block)
|
60
|
+
block = fields.pop if fields.last.kind_of?(Proc)
|
61
|
+
options = fields.extract_options!.symbolize_keys!
|
62
|
+
|
63
|
+
@definitions ||= definitions.deep_dup
|
64
|
+
fields.compact.each do |field_name|
|
65
|
+
definition = (@definitions[field_name]||{}).merge(options)
|
66
|
+
|
67
|
+
definition[:name] = field_name
|
68
|
+
definition[:label] ||= (options['label'] || field_name).to_s
|
69
|
+
definition[:required] = true unless definition.key?(:required)
|
70
|
+
definition[:create] = true unless definition.key?(:create)
|
71
|
+
definition[:update] = true unless definition.key?(:update)
|
72
|
+
|
73
|
+
definition[:converter] = block if block
|
74
|
+
definition[:converter] ||= converters[:clean]
|
75
|
+
|
76
|
+
@definitions[field_name] = definition
|
77
|
+
end
|
78
|
+
|
79
|
+
return
|
80
|
+
end
|
81
|
+
|
82
|
+
alias_method :field, :fields
|
83
|
+
|
84
|
+
def preprocess(&block)
|
85
|
+
# gets executed just before record converted to object
|
86
|
+
@preprocessor = block
|
87
|
+
end
|
88
|
+
|
89
|
+
def postprocess(&block)
|
90
|
+
# gets executed just after record converted to object
|
91
|
+
@postprocessor = block
|
92
|
+
end
|
93
|
+
|
94
|
+
def converter(name, &block)
|
95
|
+
@converters = converters.merge(name => block)
|
96
|
+
end
|
97
|
+
|
98
|
+
def convert_to(type, options = {})
|
99
|
+
converters[type] # FIXME: raise error if not found?
|
100
|
+
end
|
101
|
+
|
102
|
+
def config_dsl(*methods)
|
103
|
+
options = methods.extract_options!
|
104
|
+
options.assert_valid_keys(:default)
|
105
|
+
default = (options[:default] || nil).deep_freeze
|
106
|
+
|
107
|
+
methods.each do |m|
|
108
|
+
instance_variable_set("@#{m}", default)
|
109
|
+
|
110
|
+
singleton_class.send(:define_method, m) do |*args,&block|
|
111
|
+
if block || !args.empty?
|
112
|
+
val = (block ? instance_eval(&block) : args[0])
|
113
|
+
instance_variable_set("@#{m}", val.deep_freeze)
|
114
|
+
else
|
115
|
+
instance_variable_defined?("@#{m}") \
|
116
|
+
? instance_variable_get("@#{m}")
|
117
|
+
: superclass.send(m)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# make dsl methods available to importer instances
|
123
|
+
delegate *methods, :to => :singleton_class
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|