activerecord-fast-import2 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
6
+ nbproject/private
7
+ .idea
8
+ activerecord-fast-import-0.2.0.gem
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Jan Suchal
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,75 @@
1
+ # activerecord-fast-import
2
+
3
+ Loads data from text files into tables using fast native MySQL [LOAD DATA INFILE](http://dev.mysql.com/doc/refman/5.1/en/load-data.html) query.
4
+
5
+ ## Examples
6
+
7
+ ### Loading data from tab delimited log file
8
+
9
+ Suppose you have an ActiveRecord model LogEntry defined as `LogEntry happened_at:datetime url:string` and a log file with tab delimited columns like this:
10
+
11
+ 2009-09-30 12:32:43<tab>http://github.com/
12
+ 2009-09-30 13:36:13<tab>http://facebook.com/
13
+
14
+ To import data from this log file, you have to use
15
+
16
+ LogEntry.fast_import('huge.log')
17
+
18
+ That's it!
19
+
20
+ Of course in real world you will also need more advanced features. Read on...
21
+
22
+
23
+ ### Changing delimiters and ignoring some rows
24
+
25
+ Of course not all log files are delimited by tabs and newlines. Just pass custom delimiters to options. If you want to ignore first 10 lines, just use `:ignore_lines`
26
+
27
+ import_options = {
28
+ :fields_terminated_by => ',',
29
+ :lines_terminated_by => ';',
30
+ :ignore_lines => 10
31
+ }
32
+
33
+ ### Changing order of columns and ignoring columns
34
+
35
+ Now, imagine you want to import data from a huge log file with following format:
36
+
37
+ http://github.com/<tab>Mozilla<tab>2009-09-30 12:32:43
38
+ http://facebook.com/<tab>Opera<tab>2009-09-30 13:36:13
39
+
40
+ It is clear that columns are in different order and we even want to ignore the second column. Let's do it
41
+
42
+ import_options = {:columns => ["url", "@dummy", "happened_at"]}
43
+ LogEntry.fast_import('huge.log', import_options)
44
+
45
+ The special `@dummy` loads that column into a local variable and when unused (in a transformation) is just ignored.
46
+
47
+ ### Transforming data
48
+
49
+ Now imagine we have a log file like this:
50
+
51
+ 2009-09-30 12:32:43<tab>http://github.com/<tab>image.jpg
52
+ 2009-09-30 13:36:13<tab>http://facebook.com/<tab>styles/default.css
53
+
54
+ We want to concatenate those two columns into one.
55
+
56
+ import_options = {
57
+ :columns => ["happened_at", "@domain", "@file"],
58
+ :mapping => { :url => "CONCAT(@domain, @file)" }
59
+ }
60
+ LogEntry.fast_import('huge.log', import_options)
61
+
62
+ Of course you can use any of those shiny [MySQL functions](http://dev.mysql.com/doc/refman/5.1/en/functions.html).
63
+
64
+ ### Extra features - added by Rafal Piekarski
65
+
66
+ This version also provides other "LOAD DATA" features and they are fields in options hash:
67
+
68
+ :insert_method # defines method for inserting data: IGNORE or REPLACE
69
+ :charset_name # defines file charset
70
+ :fields_optionally_enclosed_by # allows you to define fields enclosure method
71
+ :fields_escaped_by # changes the escape character
72
+
73
+ ## Copyright
74
+
75
+ Copyright (c) 2009 Jan Suchal. See LICENSE for details.
@@ -0,0 +1,49 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "activerecord-fast-import"
8
+ gem.summary = "Fast MySQL import for ActiveRecord"
9
+ gem.description = "Native MySQL additions to ActiveRecord, like LOAD DATA INFILE, ENABLE/DISABLE KEYS, TRUNCATE TABLE."
10
+ gem.email = "johno@jsmf.net"
11
+ gem.homepage = "http://github.com/jsuchal/activerecord-fast-import"
12
+ gem.authors = ["Jan Suchal"]
13
+ gem.add_development_dependency "rspec"
14
+ gem.add_dependency 'activerecord', [">= 2.1.2"]
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
19
+ end
20
+
21
+ require 'spec/rake/spectask'
22
+ Spec::Rake::SpecTask.new(:spec) do |spec|
23
+ spec.libs << 'lib' << 'spec'
24
+ spec.spec_files = FileList['spec/**/*_spec.rb']
25
+ end
26
+
27
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
28
+ spec.libs << 'lib' << 'spec'
29
+ spec.pattern = 'spec/**/*_spec.rb'
30
+ spec.rcov = true
31
+ end
32
+
33
+ task :spec => :check_dependencies
34
+
35
+ task :default => :spec
36
+
37
+ require 'rake/rdoctask'
38
+ Rake::RDocTask.new do |rdoc|
39
+ if File.exist?('VERSION')
40
+ version = File.read('VERSION')
41
+ else
42
+ version = ""
43
+ end
44
+
45
+ rdoc.rdoc_dir = 'rdoc'
46
+ rdoc.title = "activerecord-fast-import #{version}"
47
+ rdoc.rdoc_files.include('README*')
48
+ rdoc.rdoc_files.include('lib/**/*.rb')
49
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.2.0
@@ -0,0 +1,58 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{activerecord-fast-import2}
8
+ s.version = "0.2.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Sidereel Development"]
12
+ s.date = %q{2010-05-02}
13
+ s.description = %q{Native MySQL additions to ActiveRecord, like LOAD DATA INFILE, ENABLE/DISABLE KEYS, TRUNCATE TABLE.}
14
+ s.email = %q{engineering@sidereel.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.markdown"
18
+ ]
19
+ s.files = [
20
+ ".gitignore",
21
+ "LICENSE",
22
+ "README.markdown",
23
+ "Rakefile",
24
+ "VERSION",
25
+ "activerecord-fast-import.gemspec",
26
+ "lib/activerecord-fast-import.rb",
27
+ "nbproject/project.properties",
28
+ "nbproject/project.xml",
29
+ "spec/activerecord-fast-import_spec.rb",
30
+ "spec/spec_helper.rb"
31
+ ]
32
+ s.homepage = %q{http://github.com/sidereel/activerecord-fast-import}
33
+ s.rdoc_options = ["--charset=UTF-8"]
34
+ s.require_paths = ["lib"]
35
+ s.rubygems_version = %q{1.3.5}
36
+ s.summary = %q{Fast MySQL import for ActiveRecord}
37
+ s.test_files = [
38
+ "spec/activerecord-fast-import_spec.rb",
39
+ "spec/spec_helper.rb"
40
+ ]
41
+
42
+ if s.respond_to? :specification_version then
43
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
44
+ s.specification_version = 3
45
+
46
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
47
+ s.add_development_dependency(%q<rspec>, [">= 0"])
48
+ s.add_runtime_dependency(%q<activerecord>, [">= 2.1.2"])
49
+ else
50
+ s.add_dependency(%q<rspec>, [">= 0"])
51
+ s.add_dependency(%q<activerecord>, [">= 2.1.2"])
52
+ end
53
+ else
54
+ s.add_dependency(%q<rspec>, [">= 0"])
55
+ s.add_dependency(%q<activerecord>, [">= 2.1.2"])
56
+ end
57
+ end
58
+
@@ -0,0 +1,87 @@
1
+ module ActiveRecord #:nodoc:
2
+ class Base
3
+ # Deletes all rows in table very fast, but without calling +destroy+ method
4
+ # nor any hooks.
5
+ def self.truncate_table
6
+ connection.execute("TRUNCATE TABLE #{quoted_table_name}")
7
+ end
8
+
9
+ # Disables key updates for model table
10
+ def self.disable_keys
11
+ connection.execute("ALTER TABLE #{quoted_table_name} DISABLE KEYS")
12
+ end
13
+
14
+ # Enables key updates for model table
15
+ def self.enable_keys
16
+ connection.execute("ALTER TABLE #{quoted_table_name} ENABLE KEYS")
17
+ end
18
+
19
+ # Disables keys, yields block, enables keys.
20
+ def self.with_keys_disabled
21
+ disable_keys
22
+ yield
23
+ enable_keys
24
+ end
25
+
26
+ # Loads data from file(s) using MySQL native LOAD DATA INFILE query, disabling
27
+ # key updates for even faster import speed
28
+ #
29
+ # ==== Parameters
30
+ # * +files+ file(s) to import
31
+ # * +options+ (see <tt>load_data_infile</tt>)
32
+ def self.fast_import(files, options = {})
33
+ files = [files] unless files.is_a? Array
34
+ with_keys_disabled do
35
+ load_data_infile_multiple(files, options)
36
+ end
37
+ end
38
+
39
+ # Loads data from multiple files using MySQL native LOAD DATA INFILE query
40
+ def self.load_data_infile_multiple(files, options = {})
41
+ files.each do |file|
42
+ load_data_infile(file, options)
43
+ end
44
+ end
45
+
46
+ # Loads data from file using MySQL native LOAD DATA INFILE query
47
+ #
48
+ # ==== Parameters
49
+ # * +file+ the file to import
50
+ # * +options+
51
+ def self.load_data_infile(file, options = {})
52
+ sql = "LOAD DATA LOCAL INFILE '#{file}' "
53
+ sql << "#{options[:insert_method]} " if options[:insert_method]
54
+ sql << "INTO TABLE #{quoted_table_name} "
55
+ sql << "CHARACTER SET #{options[:charset_name]} " if options[:charset_name]
56
+
57
+ fields = ""
58
+ fields << "TERMINATED BY #{structure_string options[:fields_terminated_by]} " if options[:fields_terminated_by]
59
+ fields << "OPTIONALLY ENCLOSED BY '#{options[:fields_optionally_enclosed_by]}' " if options[:fields_optionally_enclosed_by]
60
+ fields << "ESCAPED BY '#{options[:fields_escaped_by]}' " if options[:fields_escaped_by]
61
+
62
+ sql << "FIELDS #{fields} " unless fields.empty?
63
+ sql << "LINES TERMINATED BY #{structure_string options[:lines_terminated_by]} " if options[:lines_terminated_by]
64
+ sql << "IGNORE #{options[:ignore_lines]} LINES " if options[:ignore_lines]
65
+ sql << "(" + options[:columns].join(', ') + ") " if options[:columns]
66
+ if options[:mapping]
67
+ mappings = []
68
+ options[:mapping].each_pair do |column, mapping|
69
+ mappings << "#{column} = #{mapping}"
70
+ end
71
+ sql << "SET #{mappings.join(', ')} " if mappings.size > 0
72
+ end
73
+ sql << ";"
74
+ connection.execute(sql)
75
+ end
76
+
77
+ private
78
+
79
+ def self.structure_string(spec)
80
+ if spec.is_a? Fixnum
81
+ "0x%x" % spec
82
+ else
83
+ "'#{spec}'"
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,6 @@
1
+ javac.classpath=
2
+ main.file=
3
+ platform.active=Ruby
4
+ source.encoding=UTF-8
5
+ src.lib.dir=lib
6
+ test.spec.dir=spec
@@ -0,0 +1,15 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project xmlns="http://www.netbeans.org/ns/project/1">
3
+ <type>org.netbeans.modules.ruby.rubyproject</type>
4
+ <configuration>
5
+ <data xmlns="http://www.netbeans.org/ns/ruby-project/1">
6
+ <name>activerecord-fast-import</name>
7
+ <source-roots>
8
+ <root id="src.lib.dir"/>
9
+ </source-roots>
10
+ <test-roots>
11
+ <root id="test.spec.dir"/>
12
+ </test-roots>
13
+ </data>
14
+ </configuration>
15
+ </project>
@@ -0,0 +1,7 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "ActiverecordFastImport" do
4
+ it "fails" do
5
+ fail "hey buddy, you should probably rename this file and start specing for real"
6
+ end
7
+ end
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'activerecord-fast-import'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,106 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: activerecord-fast-import2
3
+ version: !ruby/object:Gem::Version
4
+ hash: 21
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 2
9
+ - 1
10
+ version: 0.2.1
11
+ platform: ruby
12
+ authors:
13
+ - Sidereel Development
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-05-02 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rspec
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 3
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ type: :development
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: activerecord
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ hash: 15
43
+ segments:
44
+ - 2
45
+ - 1
46
+ - 2
47
+ version: 2.1.2
48
+ type: :runtime
49
+ version_requirements: *id002
50
+ description: Native MySQL additions to ActiveRecord, like LOAD DATA INFILE, ENABLE/DISABLE KEYS, TRUNCATE TABLE.
51
+ email: engineering@sidereel.com
52
+ executables: []
53
+
54
+ extensions: []
55
+
56
+ extra_rdoc_files:
57
+ - LICENSE
58
+ - README.markdown
59
+ files:
60
+ - .gitignore
61
+ - LICENSE
62
+ - README.markdown
63
+ - Rakefile
64
+ - VERSION
65
+ - activerecord-fast-import.gemspec
66
+ - lib/activerecord-fast-import.rb
67
+ - nbproject/project.properties
68
+ - nbproject/project.xml
69
+ - spec/activerecord-fast-import_spec.rb
70
+ - spec/spec_helper.rb
71
+ homepage: http://github.com/sidereel/activerecord-fast-import
72
+ licenses: []
73
+
74
+ post_install_message:
75
+ rdoc_options:
76
+ - --charset=UTF-8
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ hash: 3
85
+ segments:
86
+ - 0
87
+ version: "0"
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ hash: 3
94
+ segments:
95
+ - 0
96
+ version: "0"
97
+ requirements: []
98
+
99
+ rubyforge_project:
100
+ rubygems_version: 1.8.11
101
+ signing_key:
102
+ specification_version: 3
103
+ summary: Fast MySQL import for ActiveRecord
104
+ test_files:
105
+ - spec/activerecord-fast-import_spec.rb
106
+ - spec/spec_helper.rb