activerecord-fast-import2 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,8 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
6
+ nbproject/private
7
+ .idea
8
+ activerecord-fast-import-0.2.0.gem
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Jan Suchal
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,75 @@
1
+ # activerecord-fast-import
2
+
3
+ Loads data from text files into tables using fast native MySQL [LOAD DATA INFILE](http://dev.mysql.com/doc/refman/5.1/en/load-data.html) query.
4
+
5
+ ## Examples
6
+
7
+ ### Loading data from tab delimited log file
8
+
9
+ Suppose you have an ActiveRecord model LogEntry defined as `LogEntry happened_at:datetime url:string` and a log file with tab delimited columns like this:
10
+
11
+ 2009-09-30 12:32:43<tab>http://github.com/
12
+ 2009-09-30 13:36:13<tab>http://facebook.com/
13
+
14
+ To import data from this log file, you have to use
15
+
16
+ LogEntry.fast_import('huge.log')
17
+
18
+ That's it!
19
+
20
+ Of course in real world you will also need more advanced features. Read on...
21
+
22
+
23
+ ### Changing delimiters and ignoring some rows
24
+
25
+ Of course not all log files are delimited by tabs and newlines. Just pass custom delimiters to options. If you want to ignore first 10 lines, just use `:ignore_lines`
26
+
27
+ import_options = {
28
+ :fields_terminated_by => ',',
29
+ :lines_terminated_by => ';',
30
+ :ignore_lines => 10
31
+ }
32
+
33
+ ### Changing order of columns and ignoring columns
34
+
35
+ Now, imagine you want to import data from a huge log file with following format:
36
+
37
+ http://github.com/<tab>Mozilla<tab>2009-09-30 12:32:43
38
+ http://facebook.com/<tab>Opera<tab>2009-09-30 13:36:13
39
+
40
+ It is clear that columns are in different order and we even want to ignore the second column. Let's do it
41
+
42
+ import_options = {:columns => ["url", "@dummy", "happened_at"]}
43
+ LogEntry.fast_import('huge.log', import_options)
44
+
45
+ The special `@dummy` loads that column into a local variable and when unused (in a transformation) is just ignored.
46
+
47
+ ### Transforming data
48
+
49
+ Now imagine we have a log file like this:
50
+
51
+ 2009-09-30 12:32:43<tab>http://github.com/<tab>image.jpg
52
+ 2009-09-30 13:36:13<tab>http://facebook.com/<tab>styles/default.css
53
+
54
+ We want to concatenate those two columns into one.
55
+
56
+ import_options = {
57
+ :columns => ["happened_at", "@domain", "@file"],
58
+ :mapping => { :url => "CONCAT(@domain, @file)" }
59
+ }
60
+ LogEntry.fast_import('huge.log', import_options)
61
+
62
+ Of course you can use any of those shiny [MySQL functions](http://dev.mysql.com/doc/refman/5.1/en/functions.html).
63
+
64
+ ### Extra features - added by Rafal Piekarski
65
+
66
+ This version also provides other "LOAD DATA" features and they are fields in options hash:
67
+
68
+ :insert_method # defines method for inserting data: IGNORE or REPLACE
69
+ :charset_name # defines file charset
70
+ :fields_optionally_enclosed_by # allows you to define fields enclosure method
71
+ :fields_escaped_by # changes the escape character
72
+
73
+ ## Copyright
74
+
75
+ Copyright (c) 2009 Jan Suchal. See LICENSE for details.
@@ -0,0 +1,49 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "activerecord-fast-import"
8
+ gem.summary = "Fast MySQL import for ActiveRecord"
9
+ gem.description = "Native MySQL additions to ActiveRecord, like LOAD DATA INFILE, ENABLE/DISABLE KEYS, TRUNCATE TABLE."
10
+ gem.email = "johno@jsmf.net"
11
+ gem.homepage = "http://github.com/jsuchal/activerecord-fast-import"
12
+ gem.authors = ["Jan Suchal"]
13
+ gem.add_development_dependency "rspec"
14
+ gem.add_dependency 'activerecord', [">= 2.1.2"]
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
19
+ end
20
+
21
+ require 'spec/rake/spectask'
22
+ Spec::Rake::SpecTask.new(:spec) do |spec|
23
+ spec.libs << 'lib' << 'spec'
24
+ spec.spec_files = FileList['spec/**/*_spec.rb']
25
+ end
26
+
27
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
28
+ spec.libs << 'lib' << 'spec'
29
+ spec.pattern = 'spec/**/*_spec.rb'
30
+ spec.rcov = true
31
+ end
32
+
33
+ task :spec => :check_dependencies
34
+
35
+ task :default => :spec
36
+
37
+ require 'rake/rdoctask'
38
+ Rake::RDocTask.new do |rdoc|
39
+ if File.exist?('VERSION')
40
+ version = File.read('VERSION')
41
+ else
42
+ version = ""
43
+ end
44
+
45
+ rdoc.rdoc_dir = 'rdoc'
46
+ rdoc.title = "activerecord-fast-import #{version}"
47
+ rdoc.rdoc_files.include('README*')
48
+ rdoc.rdoc_files.include('lib/**/*.rb')
49
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.2.0
@@ -0,0 +1,58 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{activerecord-fast-import2}
8
+ s.version = "0.2.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Sidereel Development"]
12
+ s.date = %q{2010-05-02}
13
+ s.description = %q{Native MySQL additions to ActiveRecord, like LOAD DATA INFILE, ENABLE/DISABLE KEYS, TRUNCATE TABLE.}
14
+ s.email = %q{engineering@sidereel.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.markdown"
18
+ ]
19
+ s.files = [
20
+ ".gitignore",
21
+ "LICENSE",
22
+ "README.markdown",
23
+ "Rakefile",
24
+ "VERSION",
25
+ "activerecord-fast-import.gemspec",
26
+ "lib/activerecord-fast-import.rb",
27
+ "nbproject/project.properties",
28
+ "nbproject/project.xml",
29
+ "spec/activerecord-fast-import_spec.rb",
30
+ "spec/spec_helper.rb"
31
+ ]
32
+ s.homepage = %q{http://github.com/sidereel/activerecord-fast-import}
33
+ s.rdoc_options = ["--charset=UTF-8"]
34
+ s.require_paths = ["lib"]
35
+ s.rubygems_version = %q{1.3.5}
36
+ s.summary = %q{Fast MySQL import for ActiveRecord}
37
+ s.test_files = [
38
+ "spec/activerecord-fast-import_spec.rb",
39
+ "spec/spec_helper.rb"
40
+ ]
41
+
42
+ if s.respond_to? :specification_version then
43
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
44
+ s.specification_version = 3
45
+
46
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
47
+ s.add_development_dependency(%q<rspec>, [">= 0"])
48
+ s.add_runtime_dependency(%q<activerecord>, [">= 2.1.2"])
49
+ else
50
+ s.add_dependency(%q<rspec>, [">= 0"])
51
+ s.add_dependency(%q<activerecord>, [">= 2.1.2"])
52
+ end
53
+ else
54
+ s.add_dependency(%q<rspec>, [">= 0"])
55
+ s.add_dependency(%q<activerecord>, [">= 2.1.2"])
56
+ end
57
+ end
58
+
@@ -0,0 +1,87 @@
1
+ module ActiveRecord #:nodoc:
2
+ class Base
3
+ # Deletes all rows in table very fast, but without calling +destroy+ method
4
+ # nor any hooks.
5
+ def self.truncate_table
6
+ connection.execute("TRUNCATE TABLE #{quoted_table_name}")
7
+ end
8
+
9
+ # Disables key updates for model table
10
+ def self.disable_keys
11
+ connection.execute("ALTER TABLE #{quoted_table_name} DISABLE KEYS")
12
+ end
13
+
14
+ # Enables key updates for model table
15
+ def self.enable_keys
16
+ connection.execute("ALTER TABLE #{quoted_table_name} ENABLE KEYS")
17
+ end
18
+
19
+ # Disables keys, yields block, enables keys.
20
+ def self.with_keys_disabled
21
+ disable_keys
22
+ yield
23
+ enable_keys
24
+ end
25
+
26
+ # Loads data from file(s) using MySQL native LOAD DATA INFILE query, disabling
27
+ # key updates for even faster import speed
28
+ #
29
+ # ==== Parameters
30
+ # * +files+ file(s) to import
31
+ # * +options+ (see <tt>load_data_infile</tt>)
32
+ def self.fast_import(files, options = {})
33
+ files = [files] unless files.is_a? Array
34
+ with_keys_disabled do
35
+ load_data_infile_multiple(files, options)
36
+ end
37
+ end
38
+
39
+ # Loads data from multiple files using MySQL native LOAD DATA INFILE query
40
+ def self.load_data_infile_multiple(files, options = {})
41
+ files.each do |file|
42
+ load_data_infile(file, options)
43
+ end
44
+ end
45
+
46
+ # Loads data from file using MySQL native LOAD DATA INFILE query
47
+ #
48
+ # ==== Parameters
49
+ # * +file+ the file to import
50
+ # * +options+
51
+ def self.load_data_infile(file, options = {})
52
+ sql = "LOAD DATA LOCAL INFILE '#{file}' "
53
+ sql << "#{options[:insert_method]} " if options[:insert_method]
54
+ sql << "INTO TABLE #{quoted_table_name} "
55
+ sql << "CHARACTER SET #{options[:charset_name]} " if options[:charset_name]
56
+
57
+ fields = ""
58
+ fields << "TERMINATED BY #{structure_string options[:fields_terminated_by]} " if options[:fields_terminated_by]
59
+ fields << "OPTIONALLY ENCLOSED BY '#{options[:fields_optionally_enclosed_by]}' " if options[:fields_optionally_enclosed_by]
60
+ fields << "ESCAPED BY '#{options[:fields_escaped_by]}' " if options[:fields_escaped_by]
61
+
62
+ sql << "FIELDS #{fields} " unless fields.empty?
63
+ sql << "LINES TERMINATED BY #{structure_string options[:lines_terminated_by]} " if options[:lines_terminated_by]
64
+ sql << "IGNORE #{options[:ignore_lines]} LINES " if options[:ignore_lines]
65
+ sql << "(" + options[:columns].join(', ') + ") " if options[:columns]
66
+ if options[:mapping]
67
+ mappings = []
68
+ options[:mapping].each_pair do |column, mapping|
69
+ mappings << "#{column} = #{mapping}"
70
+ end
71
+ sql << "SET #{mappings.join(', ')} " if mappings.size > 0
72
+ end
73
+ sql << ";"
74
+ connection.execute(sql)
75
+ end
76
+
77
+ private
78
+
79
+ def self.structure_string(spec)
80
+ if spec.is_a? Fixnum
81
+ "0x%x" % spec
82
+ else
83
+ "'#{spec}'"
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,6 @@
1
+ javac.classpath=
2
+ main.file=
3
+ platform.active=Ruby
4
+ source.encoding=UTF-8
5
+ src.lib.dir=lib
6
+ test.spec.dir=spec
@@ -0,0 +1,15 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project xmlns="http://www.netbeans.org/ns/project/1">
3
+ <type>org.netbeans.modules.ruby.rubyproject</type>
4
+ <configuration>
5
+ <data xmlns="http://www.netbeans.org/ns/ruby-project/1">
6
+ <name>activerecord-fast-import</name>
7
+ <source-roots>
8
+ <root id="src.lib.dir"/>
9
+ </source-roots>
10
+ <test-roots>
11
+ <root id="test.spec.dir"/>
12
+ </test-roots>
13
+ </data>
14
+ </configuration>
15
+ </project>
@@ -0,0 +1,7 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "ActiverecordFastImport" do
4
+ it "fails" do
5
+ fail "hey buddy, you should probably rename this file and start specing for real"
6
+ end
7
+ end
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'activerecord-fast-import'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,106 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: activerecord-fast-import2
3
+ version: !ruby/object:Gem::Version
4
+ hash: 21
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 2
9
+ - 1
10
+ version: 0.2.1
11
+ platform: ruby
12
+ authors:
13
+ - Sidereel Development
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-05-02 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rspec
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 3
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ type: :development
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: activerecord
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ hash: 15
43
+ segments:
44
+ - 2
45
+ - 1
46
+ - 2
47
+ version: 2.1.2
48
+ type: :runtime
49
+ version_requirements: *id002
50
+ description: Native MySQL additions to ActiveRecord, like LOAD DATA INFILE, ENABLE/DISABLE KEYS, TRUNCATE TABLE.
51
+ email: engineering@sidereel.com
52
+ executables: []
53
+
54
+ extensions: []
55
+
56
+ extra_rdoc_files:
57
+ - LICENSE
58
+ - README.markdown
59
+ files:
60
+ - .gitignore
61
+ - LICENSE
62
+ - README.markdown
63
+ - Rakefile
64
+ - VERSION
65
+ - activerecord-fast-import.gemspec
66
+ - lib/activerecord-fast-import.rb
67
+ - nbproject/project.properties
68
+ - nbproject/project.xml
69
+ - spec/activerecord-fast-import_spec.rb
70
+ - spec/spec_helper.rb
71
+ homepage: http://github.com/sidereel/activerecord-fast-import
72
+ licenses: []
73
+
74
+ post_install_message:
75
+ rdoc_options:
76
+ - --charset=UTF-8
77
+ require_paths:
78
+ - lib
79
+ required_ruby_version: !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ hash: 3
85
+ segments:
86
+ - 0
87
+ version: "0"
88
+ required_rubygems_version: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ hash: 3
94
+ segments:
95
+ - 0
96
+ version: "0"
97
+ requirements: []
98
+
99
+ rubyforge_project:
100
+ rubygems_version: 1.8.11
101
+ signing_key:
102
+ specification_version: 3
103
+ summary: Fast MySQL import for ActiveRecord
104
+ test_files:
105
+ - spec/activerecord-fast-import_spec.rb
106
+ - spec/spec_helper.rb