activerecord-fast-import 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
6
+ nbproject/private
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Jan Suchal
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.markdown ADDED
@@ -0,0 +1,66 @@
1
+ # activerecord-fast-import
2
+
3
+ Loads data from text files into tables using fast native MySQL [LOAD DATA INFILE](http://dev.mysql.com/doc/refman/5.1/en/load-data.html) query.
4
+
5
+ ## Examples
6
+
7
+ ### Loading data from tab delimited log file
8
+
9
+ Suppose you have an ActiveRecord model LogEntry defined as `LogEntry happened_at:datetime url:string` and a log file with tab delimited columns like this:
10
+
11
+ 2009-09-30 12:32:43<tab>http://github.com/
12
+ 2009-09-30 13:36:13<tab>http://facebook.com/
13
+
14
+ To import data from this log file, you have to use
15
+
16
+ LogEntry.fast_import('huge.log')
17
+
18
+ That's it!
19
+
20
+ Of course in real world you will also need more advanced features. Read on...
21
+
22
+
23
+ ### Changing delimiters and ignoring some rows
24
+
25
+ Of course not all log files are delimited by tabs and newlines. Just pass custom delimiters to options. If you want to ignore first 10 lines, just use `:ignore_lines`
26
+
27
+ import_options = {
28
+ :fields_terminated_by => ',',
29
+ :lines_terminated_by => ';',
30
+ :ignore_lines => 10
31
+ }
32
+
33
+ ### Changing order of columns and ignoring columns
34
+
35
+ Now, imagine you want to import data from a huge log file with following format:
36
+
37
+ http://github.com/<tab>Mozilla<tab>2009-09-30 12:32:43
38
+ http://facebook.com/<tab>Opera<tab>2009-09-30 13:36:13
39
+
40
+ It is clear that columns are in different order and we even want to ignore the second column. Let's do it
41
+
42
+ import_options = {:columns => ["url", "@dummy", "happened_at"]}
43
+ LogEntry.fast_import('huge.log', import_options)
44
+
45
+ The special `@dummy` loads that column into a local variable and when unused (in a transformation) is just ignored.
46
+
47
+ ### Transforming data
48
+
49
+ Now imagine we have a log file like this:
50
+
51
+ 2009-09-30 12:32:43<tab>http://github.com/<tab>image.jpg
52
+ 2009-09-30 13:36:13<tab>http://facebook.com/<tab>styles/default.css
53
+
54
+ We want to concatenate those two columns into one.
55
+
56
+ import_options = {
57
+ :columns => ["happened_at", "@domain", "@file"],
58
+ :mapping => { :url => "CONCAT(@domain, @file)" }
59
+ }
60
+ LogEntry.fast_import('huge.log', import_options)
61
+
62
+ Of course you can use any of those shiny [MySQL functions](http://dev.mysql.com/doc/refman/5.1/en/functions.html).
63
+
64
+ ## Copyright
65
+
66
+ Copyright (c) 2009 Jan Suchal. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,49 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "activerecord-fast-import"
8
+ gem.summary = "Fast MySQL import for ActiveRecord"
9
+ gem.description = "Native MySQL additions to ActiveRecord, like LOAD DATA INFILE, ENABLE/DISABLE KEYS, TRUNCATE TABLE."
10
+ gem.email = "johno@jsmf.net"
11
+ gem.homepage = "http://github.com/jsuchal/activerecord-fast-import"
12
+ gem.authors = ["Jan Suchal"]
13
+ gem.add_development_dependency "rspec"
14
+ gem.add_dependency 'activerecord', [">= 2.1.2"]
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
19
+ end
20
+
21
+ require 'spec/rake/spectask'
22
+ Spec::Rake::SpecTask.new(:spec) do |spec|
23
+ spec.libs << 'lib' << 'spec'
24
+ spec.spec_files = FileList['spec/**/*_spec.rb']
25
+ end
26
+
27
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
28
+ spec.libs << 'lib' << 'spec'
29
+ spec.pattern = 'spec/**/*_spec.rb'
30
+ spec.rcov = true
31
+ end
32
+
33
+ task :spec => :check_dependencies
34
+
35
+ task :default => :spec
36
+
37
+ require 'rake/rdoctask'
38
+ Rake::RDocTask.new do |rdoc|
39
+ if File.exist?('VERSION')
40
+ version = File.read('VERSION')
41
+ else
42
+ version = ""
43
+ end
44
+
45
+ rdoc.rdoc_dir = 'rdoc'
46
+ rdoc.title = "activerecord-fast-import #{version}"
47
+ rdoc.rdoc_files.include('README*')
48
+ rdoc.rdoc_files.include('lib/**/*.rb')
49
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.3
@@ -0,0 +1,57 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{activerecord-fast-import}
8
+ s.version = "0.1.3"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Jan Suchal"]
12
+ s.date = %q{2009-09-09}
13
+ s.description = %q{Native MySQL additions to ActiveRecord, like LOAD DATA INFILE, ENABLE/DISABLE KEYS, TRUNCATE TABLE.}
14
+ s.email = %q{johno@jsmf.net}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.markdown"
18
+ ]
19
+ s.files = [
20
+ ".gitignore",
21
+ "LICENSE",
22
+ "README.markdown",
23
+ "Rakefile",
24
+ "VERSION",
25
+ "activerecord-fast-import.gemspec",
26
+ "lib/activerecord-fast-import.rb",
27
+ "nbproject/project.properties",
28
+ "nbproject/project.xml",
29
+ "spec/activerecord-fast-import_spec.rb",
30
+ "spec/spec_helper.rb"
31
+ ]
32
+ s.homepage = %q{http://github.com/jsuchal/activerecord-fast-import}
33
+ s.rdoc_options = ["--charset=UTF-8"]
34
+ s.require_paths = ["lib"]
35
+ s.rubygems_version = %q{1.3.5}
36
+ s.summary = %q{Fast MySQL import for ActiveRecord}
37
+ s.test_files = [
38
+ "spec/activerecord-fast-import_spec.rb",
39
+ "spec/spec_helper.rb"
40
+ ]
41
+
42
+ if s.respond_to? :specification_version then
43
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
44
+ s.specification_version = 3
45
+
46
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
47
+ s.add_development_dependency(%q<rspec>, [">= 0"])
48
+ s.add_runtime_dependency(%q<activerecord>, [">= 2.1.2"])
49
+ else
50
+ s.add_dependency(%q<rspec>, [">= 0"])
51
+ s.add_dependency(%q<activerecord>, [">= 2.1.2"])
52
+ end
53
+ else
54
+ s.add_dependency(%q<rspec>, [">= 0"])
55
+ s.add_dependency(%q<activerecord>, [">= 2.1.2"])
56
+ end
57
+ end
@@ -0,0 +1,54 @@
1
+ module ActiveRecord #:nodoc:
2
+ class Base
3
+ # Deletes all rows in table very fast, but without calling +destroy+ method
4
+ # nor any hooks.
5
+ def self.truncate_table
6
+ connection.execute("TRUNCATE TABLE #{quoted_table_name}")
7
+ end
8
+
9
+ # Disables key updates for model table
10
+ def self.disable_keys
11
+ connection.execute("ALTER TABLE #{quoted_table_name} DISABLE KEYS")
12
+ end
13
+
14
+ # Enables key updates for model table
15
+ def self.enable_keys
16
+ connection.execute("ALTER TABLE #{quoted_table_name} ENABLE KEYS")
17
+ end
18
+
19
+ # Loads data from file using MySQL native LOAD DATA INFILE query, disabling
20
+ # key updates for even faster import speed
21
+ #
22
+ # ==== Parameters
23
+ # * +file+ file(s) to import
24
+ # * +options+ (see <tt>load_data_infile</tt>)
25
+ def self.fast_import(files, options = {})
26
+ files = [files] unless files.is_a? Array
27
+ disable_keys
28
+ files.each {|file| load_data_infile(file, options)}
29
+ enable_keys
30
+ end
31
+
32
+ # Loads data from file using MySQL native LOAD DATA INFILE query
33
+ #
34
+ # ==== Parameters
35
+ # * +file+ the file to import
36
+ # * +options+
37
+ def self.load_data_infile(file, options = {})
38
+ sql = "LOAD DATA LOCAL INFILE '#{file}' INTO TABLE #{quoted_table_name} "
39
+ sql << "FIELDS TERMINATED BY '#{options[:fields_terminated_by]}' " if options[:fields_terminated_by]
40
+ sql << "LINES TERMINATED BY '#{options[:lines_terminated_by]}' " if options[:lines_terminated_by]
41
+ sql << "IGNORE #{options[:ignore_lines]} LINES " if options[:ignore_lines]
42
+ sql << "(" + options[:columns].join(', ') + ") " if options[:columns]
43
+ if options[:mapping]
44
+ mappings = []
45
+ options[:mapping].each_pair do |column, mapping|
46
+ mappings << "#{column} = #{mapping}"
47
+ end
48
+ sql << "SET #{mappings.join(', ')} " if mappings.size > 0
49
+ end
50
+ sql << ";"
51
+ connection.execute(sql)
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,6 @@
1
+ javac.classpath=
2
+ main.file=
3
+ platform.active=Ruby
4
+ source.encoding=UTF-8
5
+ src.lib.dir=lib
6
+ test.spec.dir=spec
@@ -0,0 +1,15 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project xmlns="http://www.netbeans.org/ns/project/1">
3
+ <type>org.netbeans.modules.ruby.rubyproject</type>
4
+ <configuration>
5
+ <data xmlns="http://www.netbeans.org/ns/ruby-project/1">
6
+ <name>activerecord-fast-import</name>
7
+ <source-roots>
8
+ <root id="src.lib.dir"/>
9
+ </source-roots>
10
+ <test-roots>
11
+ <root id="test.spec.dir"/>
12
+ </test-roots>
13
+ </data>
14
+ </configuration>
15
+ </project>
@@ -0,0 +1,7 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "ActiverecordFastImport" do
4
+ it "fails" do
5
+ fail "hey buddy, you should probably rename this file and start specing for real"
6
+ end
7
+ end
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'activerecord-fast-import'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: activerecord-fast-import
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.3
5
+ platform: ruby
6
+ authors:
7
+ - Jan Suchal
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-09-09 00:00:00 +02:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rspec
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: activerecord
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 2.1.2
34
+ version:
35
+ description: Native MySQL additions to ActiveRecord, like LOAD DATA INFILE, ENABLE/DISABLE KEYS, TRUNCATE TABLE.
36
+ email: johno@jsmf.net
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - LICENSE
43
+ - README.markdown
44
+ files:
45
+ - .gitignore
46
+ - LICENSE
47
+ - README.markdown
48
+ - Rakefile
49
+ - VERSION
50
+ - activerecord-fast-import.gemspec
51
+ - lib/activerecord-fast-import.rb
52
+ - nbproject/project.properties
53
+ - nbproject/project.xml
54
+ - spec/activerecord-fast-import_spec.rb
55
+ - spec/spec_helper.rb
56
+ has_rdoc: true
57
+ homepage: http://github.com/jsuchal/activerecord-fast-import
58
+ licenses: []
59
+
60
+ post_install_message:
61
+ rdoc_options:
62
+ - --charset=UTF-8
63
+ require_paths:
64
+ - lib
65
+ required_ruby_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: "0"
70
+ version:
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: "0"
76
+ version:
77
+ requirements: []
78
+
79
+ rubyforge_project:
80
+ rubygems_version: 1.3.5
81
+ signing_key:
82
+ specification_version: 3
83
+ summary: Fast MySQL import for ActiveRecord
84
+ test_files:
85
+ - spec/activerecord-fast-import_spec.rb
86
+ - spec/spec_helper.rb