importu 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/Gemfile +4 -0
- data/README.md +163 -0
- data/importu.gemspec +29 -0
- data/lib/importu.rb +12 -0
- data/lib/importu/converters.rb +82 -0
- data/lib/importu/core_ext.rb +3 -0
- data/lib/importu/core_ext/array/deep_freeze.rb +7 -0
- data/lib/importu/core_ext/deep_freeze.rb +3 -0
- data/lib/importu/core_ext/hash/deep_freeze.rb +7 -0
- data/lib/importu/core_ext/object/deep_freeze.rb +6 -0
- data/lib/importu/dsl.rb +127 -0
- data/lib/importu/exceptions.rb +34 -0
- data/lib/importu/importer.rb +119 -0
- data/lib/importu/importer/csv.rb +52 -0
- data/lib/importu/importer/json.rb +45 -0
- data/lib/importu/importer/xml.rb +55 -0
- data/lib/importu/record.rb +124 -0
- data/lib/importu/version.rb +3 -0
- data/spec/factories/importer.rb +12 -0
- data/spec/factories/importer_record.rb +13 -0
- data/spec/factories/json_importer.rb +14 -0
- data/spec/factories/xml_importer.rb +12 -0
- data/spec/lib/importu/converters_spec.rb +276 -0
- data/spec/lib/importu/dsl_spec.rb +26 -0
- data/spec/lib/importu/exceptions_spec.rb +96 -0
- data/spec/lib/importu/importer/json_spec.rb +37 -0
- data/spec/lib/importu/importer/xml_spec.rb +14 -0
- data/spec/lib/importu/record_spec.rb +123 -0
- data/spec/spec_helper.rb +14 -0
- data/spec/support/matchers/delegate_matcher.rb +42 -0
- metadata +218 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,163 @@
|
|
1
|
+
## Overview
|
2
|
+
Importu is a framework and DSL for simplifying the process of importing
|
3
|
+
structured data into your application. It is also a tool for separating
|
4
|
+
import-related business logic from the rest of your code.
|
5
|
+
|
6
|
+
Current supported source formats include CSV/TSV, XML and JSON. It is fairly
|
7
|
+
trivial to extend Importu to handle additional formats. See the
|
8
|
+
`lib/importu/importer` directory for implementations of supported importers.
|
9
|
+
|
10
|
+
The current version of Importu depends on both ActiveRecord and ActiveSupport,
|
11
|
+
which will become optional in a future release.
|
12
|
+
|
13
|
+
## Example
|
14
|
+
**Please read the tutorial in the
|
15
|
+
[import-examples](https://github.com/dhedlund/importu-examples) repository for
|
16
|
+
a more complete overview of available features.**
|
17
|
+
|
18
|
+
Assuming you have the following data in the file `data.csv`:
|
19
|
+
```
|
20
|
+
"isbn10","title","author","release_date","pages"
|
21
|
+
"0596516177","The Ruby Programming Language","David Flanagan and Yukihiro Matsumoto","Feb 1, 2008","448"
|
22
|
+
"1449355978","Computer Science Programming Basics in Ruby","Ophir Frieder, Gideon Frieder and David Grossman","May 1, 2013","188"
|
23
|
+
"0596523696","Ruby Cookbook"," Lucas Carlson and Leonard Richardson","Jul 26, 2006","910"
|
24
|
+
```
|
25
|
+
|
26
|
+
You can create a minimal importer to read the CSV data:
|
27
|
+
```ruby
|
28
|
+
class BookImporter < Importu::Importer::Csv
|
29
|
+
# fields we expect to find in the CSV file, field order is not important
|
30
|
+
fields :title, :author, :isbn10, :pages, :release_date
|
31
|
+
end
|
32
|
+
```
|
33
|
+
|
34
|
+
And then load that data in your application:
|
35
|
+
```ruby
|
36
|
+
require 'importu'
|
37
|
+
|
38
|
+
filename = File.expand_path('../data.csv', __FILE__)
|
39
|
+
importer = BookImporter.new(filename)
|
40
|
+
|
41
|
+
# importer.records returns an Enumerable
|
42
|
+
importer.records.count # => 3
|
43
|
+
importer.records.select {|r| r[:author] =~ /Matsumoto/ }.count # => 1
|
44
|
+
importer.records.each do |record|
|
45
|
+
# ...
|
46
|
+
end
|
47
|
+
|
48
|
+
importer.records.map(&:to_hash)
|
49
|
+
```
|
50
|
+
|
51
|
+
A more complete example of the book importer above might look like the following:
|
52
|
+
```ruby
|
53
|
+
require 'importu'
|
54
|
+
|
55
|
+
class BookImporter < Importu::Importer::Csv
|
56
|
+
# if you want to define multiple fields with similar rules, use 'fields'
|
57
|
+
# NOTE: ':required => true' is redundant in this example; any defined
|
58
|
+
# fields must have a corresponding column in the source data by default
|
59
|
+
fields :title, :isbn10, :authors, :required => true
|
60
|
+
|
61
|
+
# to mark a field as optional in the source data
|
62
|
+
field :pages, :required => false
|
63
|
+
|
64
|
+
# you can reference the same field multiple times and apply rules
|
65
|
+
# incrementally; this provides a lot of flexibility in describing your
|
66
|
+
# importer rules, such as grouping all the required fields together and
|
67
|
+
# explicitly stating that "these are required"; the importer becomes the
|
68
|
+
# reference document:
|
69
|
+
#
|
70
|
+
# fields :title, :isbn10, :authors, :release_date, :required => true
|
71
|
+
# fields :pages, :required => false
|
72
|
+
#
|
73
|
+
# ...or keep all the rules for that field with that field, whatever makes
|
74
|
+
# sense for your particular use case.
|
75
|
+
|
76
|
+
# if your field is not named the same as the source data, you can use
|
77
|
+
# :label => '...' to reference the correct field, where the label is what
|
78
|
+
# the field is labelled in the source data
|
79
|
+
field :authors, :label => 'author'
|
80
|
+
|
81
|
+
# you can convert fields using one of the built-in converters
|
82
|
+
field :pages, &convert_to(:integer)
|
83
|
+
field :release_date, &convert_to(:date) # date format is guessed
|
84
|
+
|
85
|
+
# some converters allow you to pass additional arguments; in the case of
|
86
|
+
# the date converter, you can pass an explicit format and it will raise an
|
87
|
+
# error if a date is encountered that doesn't match
|
88
|
+
field :release_date, &convert_to(:date, :format => '%b %d, %Y')
|
89
|
+
|
90
|
+
# passing a block to a field definition allows you to add your own logic
|
91
|
+
# for converting data or checking for unexpected values
|
92
|
+
field :authors do
|
93
|
+
value = clean(:authors) # apply :clean converter which strips whitespace
|
94
|
+
authors = value ? value.split(/(?:, )|(?: and )|(?: & )/i) : []
|
95
|
+
|
96
|
+
if authors.none?
|
97
|
+
# ArgumentError will be converted to an Importu::FieldParseError, which
|
98
|
+
# will include the name of the field affected
|
99
|
+
raise ArgumentError, "at least one author is required"
|
100
|
+
end
|
101
|
+
|
102
|
+
authors
|
103
|
+
end
|
104
|
+
|
105
|
+
# abstract fields that are not part of the original data set can be created
|
106
|
+
field :by_matz, :abstract => true do
|
107
|
+
# field conversion rules can reference other fields; the field value is
|
108
|
+
# what would be returned after referenced field's rules have been applied
|
109
|
+
field_value(:authors).include?('Yukihiro Matsumoto')
|
110
|
+
end
|
111
|
+
end
|
112
|
+
```
|
113
|
+
|
114
|
+
A more condensed version of the above, with all the rules grouped into individual field definitions:
|
115
|
+
```ruby
|
116
|
+
class BookImporter < Importu::Importer::Csv
|
117
|
+
fields :title, :isbn10
|
118
|
+
|
119
|
+
field :authors, :label => 'author' do
|
120
|
+
authors = clean(:authors).to_s.split(/(?:, )|(?: and )|(?: & )/i)
|
121
|
+
raise ArgumentError, "at least one author is required" if authors.none?
|
122
|
+
|
123
|
+
authors
|
124
|
+
end
|
125
|
+
|
126
|
+
field :pages, :required => false, &convert_to(:integer)
|
127
|
+
field :release_date, &convert_to(:date, :format => '%b %d, %Y')
|
128
|
+
|
129
|
+
field :by_matz, :abstract => true do
|
130
|
+
field_value(:authors).include?('Yukihiro Matsumoto')
|
131
|
+
end
|
132
|
+
end
|
133
|
+
```
|
134
|
+
|
135
|
+
### Rails / ActiveRecord
|
136
|
+
If you define a model in the importer definition and the importer fields are
|
137
|
+
named the same as the attributes in your model, Importu can iterate through and
|
138
|
+
create or update records for you:
|
139
|
+
|
140
|
+
```ruby
|
141
|
+
class BookImporter < Importu::Importer::Csv
|
142
|
+
model 'Book'
|
143
|
+
|
144
|
+
# ...
|
145
|
+
end
|
146
|
+
|
147
|
+
filename = File.expand_path('../data.csv', __FILE__)
|
148
|
+
importer = BookImporter.new(filename)
|
149
|
+
|
150
|
+
importer.import!
|
151
|
+
|
152
|
+
importer.total # => 3
|
153
|
+
importer.invalid # => 0
|
154
|
+
importer.created # => 3
|
155
|
+
importer.updated # => 0
|
156
|
+
importer.unchanged # => 0
|
157
|
+
|
158
|
+
importer.import!
|
159
|
+
|
160
|
+
importer.total # => 3
|
161
|
+
importer.created # => 0
|
162
|
+
importer.unchanged # => 3
|
163
|
+
```
|
data/importu.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
$LOAD_PATH.unshift File.expand_path('../lib', __FILE__)
|
2
|
+
require 'importu/version'
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = 'importu'
|
6
|
+
s.version = Importu::VERSION
|
7
|
+
s.platform = Gem::Platform::RUBY
|
8
|
+
s.authors = ['Daniel Hedlund']
|
9
|
+
s.email = ['daniel@lincracy.com']
|
10
|
+
s.homepage = 'https://github.com/lincracy/importu'
|
11
|
+
s.summary = 'A framework for importing data'
|
12
|
+
s.description = 'Importu is a framework for importing data'
|
13
|
+
|
14
|
+
s.files = `git ls-files`.split("\n")
|
15
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
16
|
+
s.require_paths = ['lib']
|
17
|
+
|
18
|
+
s.licenses = ['MIT']
|
19
|
+
|
20
|
+
s.add_dependency 'activesupport', ['>= 3.0.0']
|
21
|
+
s.add_dependency 'activerecord', ['>= 3.0.0']
|
22
|
+
s.add_dependency 'multi_json', ['~> 1.0']
|
23
|
+
s.add_dependency 'nokogiri'
|
24
|
+
|
25
|
+
s.add_development_dependency 'bundler', ['>= 1.0.0']
|
26
|
+
s.add_development_dependency 'rspec', ['>= 0']
|
27
|
+
s.add_development_dependency 'rdoc', ['>= 0']
|
28
|
+
s.add_development_dependency 'factory_girl', ['>= 3.5.0']
|
29
|
+
end
|
data/lib/importu.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
module Importu; end
|
2
|
+
|
3
|
+
require 'importu/core_ext'
|
4
|
+
|
5
|
+
require 'importu/dsl'
|
6
|
+
require 'importu/exceptions'
|
7
|
+
require 'importu/converters'
|
8
|
+
require 'importu/record'
|
9
|
+
require 'importu/importer'
|
10
|
+
require 'importu/importer/csv'
|
11
|
+
require 'importu/importer/json'
|
12
|
+
require 'importu/importer/xml'
|
@@ -0,0 +1,82 @@
|
|
1
|
+
require 'active_support/core_ext/object/blank'
|
2
|
+
require 'active_support/core_ext/date_time/conversions'
|
3
|
+
require 'active_support/concern'
|
4
|
+
|
5
|
+
require 'bigdecimal'
|
6
|
+
|
7
|
+
module Importu::Converters
|
8
|
+
extend ActiveSupport::Concern
|
9
|
+
|
10
|
+
included do
|
11
|
+
converter :raw do |name,options|
|
12
|
+
definition = definitions[name] \
|
13
|
+
or raise "importer field not defined: #{name}"
|
14
|
+
|
15
|
+
label = definition[:label]
|
16
|
+
raise Importu::MissingField, definition unless data.key?(label)
|
17
|
+
data[label]
|
18
|
+
end
|
19
|
+
|
20
|
+
converter :clean do |name,options|
|
21
|
+
value = convert(name, :raw, options)
|
22
|
+
value.is_a?(String) \
|
23
|
+
? (value.blank? ? nil : value.strip)
|
24
|
+
: value
|
25
|
+
end
|
26
|
+
|
27
|
+
converter :string do |name,options|
|
28
|
+
convert(name, :clean, options).try(:to_s)
|
29
|
+
end
|
30
|
+
|
31
|
+
converter :integer do |name,options|
|
32
|
+
value = convert(name, :clean, options)
|
33
|
+
value.nil? ? nil : Integer(value)
|
34
|
+
end
|
35
|
+
|
36
|
+
converter :float do |name,options|
|
37
|
+
value = convert(name, :clean, options)
|
38
|
+
value.nil? ? nil : Float(value)
|
39
|
+
end
|
40
|
+
|
41
|
+
converter :decimal do |name,options|
|
42
|
+
value = convert(name, :clean, options)
|
43
|
+
case value
|
44
|
+
when nil then nil
|
45
|
+
when BigDecimal then value
|
46
|
+
when /\A-?\d+(?:\.\d+)?\Z/ then BigDecimal(value)
|
47
|
+
else raise ArgumentError, "invalid decimal value '#{value}'"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
converter :boolean do |name,options|
|
52
|
+
value = convert(name, :clean, options)
|
53
|
+
case value
|
54
|
+
when nil then nil
|
55
|
+
when true, 'true', 'yes', '1', 1 then true
|
56
|
+
when false, 'false', 'no', '0', 0 then false
|
57
|
+
else raise ArgumentError, "invalid boolean value '#{value}'"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
converter :date do |name,options|
|
62
|
+
if value = convert(name, :clean, options)
|
63
|
+
# TODO: options[:date_format] is deprecated
|
64
|
+
date_format = options[:date_format] || options[:format]
|
65
|
+
date_format \
|
66
|
+
? Date.strptime(value, date_format)
|
67
|
+
: Date.parse(value)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
converter :datetime do |name,options|
|
72
|
+
if value = convert(name, :clean, options)
|
73
|
+
# TODO: options[:date_format] is deprecated
|
74
|
+
date_format = options[:date_format] || options[:format]
|
75
|
+
date_format \
|
76
|
+
? DateTime.strptime(value, date_format).utc
|
77
|
+
: DateTime.parse(value).utc
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
end
|
data/lib/importu/dsl.rb
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
require 'active_support/core_ext/module/delegation'
|
2
|
+
require 'active_support/core_ext/hash/deep_dup'
|
3
|
+
require 'active_support/core_ext/hash/keys'
|
4
|
+
require 'active_support/concern'
|
5
|
+
|
6
|
+
require 'importu/core_ext/deep_freeze'
|
7
|
+
|
8
|
+
# importer definition examples:
|
9
|
+
# allow_actions :create
|
10
|
+
# allow_actions :create, :update
|
11
|
+
#
|
12
|
+
# find_by :id # match against a single field, :id (default)
|
13
|
+
# find_by [:name, :date] # match against multiple fields
|
14
|
+
# find_by :id, [:name, :date] # try name/date combo if no id match
|
15
|
+
# find_by nil # never try to look up records, assume :create
|
16
|
+
# find_by do |record|
|
17
|
+
# scoped.where(:foo => record[:name].downcase)
|
18
|
+
# end
|
19
|
+
#
|
20
|
+
# field :field1, :label => 'Field 1'
|
21
|
+
# fields :field1, :field2, :field3
|
22
|
+
# fields :field1, :field2, convert_to(:integer)
|
23
|
+
# fields :field1, :field2 do |data,definition|
|
24
|
+
# Time.strptime(data[definition[:label]], '%d/%m/%Y')
|
25
|
+
# end
|
26
|
+
#
|
27
|
+
# allow actions:
|
28
|
+
# :create - if an existing record can't be found, we can create it
|
29
|
+
# :update - if an existing record found, update its attributes
|
30
|
+
#
|
31
|
+
# field(s) definition options:
|
32
|
+
# :label - header/label/key/element name used in input file (default: field name)
|
33
|
+
# :required - must be present in input file (values can be blank, default: true)
|
34
|
+
|
35
|
+
require 'active_support/concern'
|
36
|
+
|
37
|
+
module Importu::Dsl
|
38
|
+
extend ActiveSupport::Concern
|
39
|
+
|
40
|
+
included do
|
41
|
+
config_dsl :record_class, :default => Importu::Record
|
42
|
+
config_dsl :model, :description
|
43
|
+
config_dsl :allowed_actions, :default => [:create]
|
44
|
+
config_dsl :finder_fields, :default => [[:id]]
|
45
|
+
config_dsl :definitions, :default => {}
|
46
|
+
config_dsl :preprocessor, :postprocessor
|
47
|
+
config_dsl :converters, :default => {}
|
48
|
+
end
|
49
|
+
|
50
|
+
module ClassMethods
|
51
|
+
def allow_actions(*actions)
|
52
|
+
@allowed_actions = actions
|
53
|
+
end
|
54
|
+
|
55
|
+
def find_by(*field_groups, &block)
|
56
|
+
@finder_fields = block ? [block] : field_groups.map {|g|g&&[*g]}.compact
|
57
|
+
end
|
58
|
+
|
59
|
+
def fields(*fields, &block)
|
60
|
+
block = fields.pop if fields.last.kind_of?(Proc)
|
61
|
+
options = fields.extract_options!.symbolize_keys!
|
62
|
+
|
63
|
+
@definitions ||= definitions.deep_dup
|
64
|
+
fields.compact.each do |field_name|
|
65
|
+
definition = (@definitions[field_name]||{}).merge(options)
|
66
|
+
|
67
|
+
definition[:name] = field_name
|
68
|
+
definition[:label] ||= (options['label'] || field_name).to_s
|
69
|
+
definition[:required] = true unless definition.key?(:required)
|
70
|
+
definition[:create] = true unless definition.key?(:create)
|
71
|
+
definition[:update] = true unless definition.key?(:update)
|
72
|
+
|
73
|
+
definition[:converter] = block if block
|
74
|
+
definition[:converter] ||= converters[:clean]
|
75
|
+
|
76
|
+
@definitions[field_name] = definition
|
77
|
+
end
|
78
|
+
|
79
|
+
return
|
80
|
+
end
|
81
|
+
|
82
|
+
alias_method :field, :fields
|
83
|
+
|
84
|
+
def preprocess(&block)
|
85
|
+
# gets executed just before record converted to object
|
86
|
+
@preprocessor = block
|
87
|
+
end
|
88
|
+
|
89
|
+
def postprocess(&block)
|
90
|
+
# gets executed just after record converted to object
|
91
|
+
@postprocessor = block
|
92
|
+
end
|
93
|
+
|
94
|
+
def converter(name, &block)
|
95
|
+
@converters = converters.merge(name => block)
|
96
|
+
end
|
97
|
+
|
98
|
+
def convert_to(type, options = {})
|
99
|
+
converters[type] # FIXME: raise error if not found?
|
100
|
+
end
|
101
|
+
|
102
|
+
def config_dsl(*methods)
|
103
|
+
options = methods.extract_options!
|
104
|
+
options.assert_valid_keys(:default)
|
105
|
+
default = (options[:default] || nil).deep_freeze
|
106
|
+
|
107
|
+
methods.each do |m|
|
108
|
+
instance_variable_set("@#{m}", default)
|
109
|
+
|
110
|
+
singleton_class.send(:define_method, m) do |*args,&block|
|
111
|
+
if block || !args.empty?
|
112
|
+
val = (block ? instance_eval(&block) : args[0])
|
113
|
+
instance_variable_set("@#{m}", val.deep_freeze)
|
114
|
+
else
|
115
|
+
instance_variable_defined?("@#{m}") \
|
116
|
+
? instance_variable_get("@#{m}")
|
117
|
+
: superclass.send(m)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# make dsl methods available to importer instances
|
123
|
+
delegate *methods, :to => :singleton_class
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|