jsuchal-activerecord-fast-import 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.markdown ADDED
@@ -0,0 +1,66 @@
1
+ # activerecord-fast-import
2
+
3
+ Loads data from text files into tables using fast native MySQL [LOAD DATA INFILE](http://dev.mysql.com/doc/refman/5.1/en/load-data.html) query.
4
+
5
+ ## Examples
6
+
7
+ ### Loading data from tab delimited log file
8
+
9
+ Suppose you have an ActiveRecord model LogEntry defined as `LogEntry happened_at:datetime url:string` and a log file with tab delimited columns like this:
10
+
11
+ 2009-09-30 12:32:43<tab>http://github.com/
12
+ 2009-09-30 13:36:13<tab>http://facebook.com/
13
+
14
+ To import data from this log file, you have to use
15
+
16
+ LogEntry.fast_import('huge.log')
17
+
18
+ That's it!
19
+
20
+ Of course in real world you will also need more advanced features. Read on...
21
+
22
+
23
+ ### Changing delimiters and ignoring some rows
24
+
25
+ Of course not all log files are delimited by tabs and newlines. Just pass custom delimiters to options. If you want to ignore first 10 lines, just use `:ignore_lines`
26
+
27
+ import_options = {
28
+ :fields_terminated_by => ',',
29
+ :lines_terminated_by => ';',
30
+ :ignore_lines => 10
31
+ }
32
+
33
+ ### Changing order of columns and ignoring columns
34
+
35
+ Now, imagine you want to import data from a huge log file with following format:
36
+
37
+ http://github.com/<tab>Mozilla<tab>2009-09-30 12:32:43
38
+ http://facebook.com/<tab>Opera<tab>2009-09-30 13:36:13
39
+
40
+ It is clear that columns are in different order and we even want to ignore the second column. Let's do it
41
+
42
+ import_options = {:columns => ["url", "@dummy", "happened_at"]}
43
+ LogEntry.fast_import('huge.log', import_options)
44
+
45
+ The special `@dummy` loads that column into a local variable and when unused (in a transformation) is just ignored.
46
+
47
+ ### Transforming data
48
+
49
+ Now imagine we have a log file like this:
50
+
51
+ 2009-09-30 12:32:43<tab>http://github.com/<tab>image.jpg
52
+ 2009-09-30 13:36:13<tab>http://facebook.com/<tab>styles/default.css
53
+
54
+ We want to concatenate those two columns into one.
55
+
56
+ import_options = {
57
+ :columns => ["happened_at", "@domain", "@file"],
58
+ :mapping => { :url => "CONCAT(@domain, @file)" }
59
+ }
60
+ LogEntry.fast_import('huge.log', import_options)
61
+
62
+ Of course you can use any of those shiny [MySQL functions](http://dev.mysql.com/doc/refman/5.1/en/functions.html).
63
+
64
+ ## Copyright
65
+
66
+ Copyright (c) 2009 Jan Suchal. See LICENSE for details.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.1.1
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{activerecord-fast-import}
8
- s.version = "0.1.0"
8
+ s.version = "0.1.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Jan Suchal"]
@@ -14,13 +14,13 @@ Gem::Specification.new do |s|
14
14
  s.email = %q{johno@jsmf.net}
15
15
  s.extra_rdoc_files = [
16
16
  "LICENSE",
17
- "README.rdoc"
17
+ "README.markdown"
18
18
  ]
19
19
  s.files = [
20
20
  ".document",
21
21
  ".gitignore",
22
22
  "LICENSE",
23
- "README.rdoc",
23
+ "README.markdown",
24
24
  "Rakefile",
25
25
  "VERSION",
26
26
  "activerecord-fast-import.gemspec",
@@ -1,53 +1,53 @@
1
- module ActiveRecord
2
- class Base
3
- # Deletes all rows in table very fast, but without calling +destroy+ method
4
- # nor any hooks.
5
- def self.truncate_table
6
- connection.execute("TRUNCATE TABLE #{quoted_table_name}")
7
- end
8
-
9
- # Disables key updates for model table
10
- def self.disable_keys
11
- connection.execute("ALTER TABLE #{quoted_table_name} DISABLE KEYS")
12
- end
13
-
14
- # Enables key updates for model table
15
- def self.enable_keys
16
- connection.execute("ALTER TABLE #{quoted_table_name} ENABLE KEYS")
17
- end
18
-
19
- # Loads data from file using MySQL native LOAD DATA INFILE query, disabling
20
- # key updates for even faster import speed
21
- #
22
- # ==== Parameters
23
- # * +file+ the file to load
24
- # * +options+ (see <tt>load_data_infile?</tt>)
25
- def self.fast_import(file, options = {})
26
- disable_keys
27
- load_data_infile(file, options)
28
- enable_keys
29
- end
30
-
31
- # Loads data from file using MySQL native LOAD DATA INFILE query
32
- #
33
- # ==== Parameters
34
- # * +file+ the file to import
35
- # * +options+
36
- def self.load_data_infile(file, options = {})
37
- sql = "LOAD DATA LOCAL INFILE '#{file}' INTO TABLE #{quoted_table_name} "
38
- sql << "FIELDS TERMINATED BY '#{options[:fields_terminated_by]}' " if options[:fields_terminated_by]
39
- sql << "LINES TERMINATED BY '#{options[:lines_terminated_by]}' " if options[:lines_terminated_by]
40
- sql << "IGNORE #{options[:ignore_lines]} LINES " if options[:ignore_lines]
41
- sql << "(" + options[:columns].join(', ') + ") " if options[:columns]
42
- if options[:mapping]
43
- mappings = []
44
- options[:mapping].each_pair do |column, mapping|
45
- mappings << "#{column} = #{mapping}"
46
- end
47
- sql << "SET #{mappings.join(', ')} " if mappings.size > 0
48
- end
49
- sql << ";"
50
- connection.execute(sql)
51
- end
52
- end
1
+ module ActiveRecord #:nodoc:
2
+ class Base
3
+ # Deletes all rows in table very fast, but without calling +destroy+ method
4
+ # nor any hooks.
5
+ def self.truncate_table
6
+ connection.execute("TRUNCATE TABLE #{quoted_table_name}")
7
+ end
8
+
9
+ # Disables key updates for model table
10
+ def self.disable_keys
11
+ connection.execute("ALTER TABLE #{quoted_table_name} DISABLE KEYS")
12
+ end
13
+
14
+ # Enables key updates for model table
15
+ def self.enable_keys
16
+ connection.execute("ALTER TABLE #{quoted_table_name} ENABLE KEYS")
17
+ end
18
+
19
+ # Loads data from file using MySQL native LOAD DATA INFILE query, disabling
20
+ # key updates for even faster import speed
21
+ #
22
+ # ==== Parameters
23
+ # * +file+ the file to load
24
+ # * +options+ (see <tt>load_data_infile</tt>)
25
+ def self.fast_import(file, options = {})
26
+ disable_keys
27
+ load_data_infile(file, options)
28
+ enable_keys
29
+ end
30
+
31
+ # Loads data from file using MySQL native LOAD DATA INFILE query
32
+ #
33
+ # ==== Parameters
34
+ # * +file+ the file to import
35
+ # * +options+
36
+ def self.load_data_infile(file, options = {})
37
+ sql = "LOAD DATA LOCAL INFILE '#{file}' INTO TABLE #{quoted_table_name} "
38
+ sql << "FIELDS TERMINATED BY '#{options[:fields_terminated_by]}' " if options[:fields_terminated_by]
39
+ sql << "LINES TERMINATED BY '#{options[:lines_terminated_by]}' " if options[:lines_terminated_by]
40
+ sql << "IGNORE #{options[:ignore_lines]} LINES " if options[:ignore_lines]
41
+ sql << "(" + options[:columns].join(', ') + ") " if options[:columns]
42
+ if options[:mapping]
43
+ mappings = []
44
+ options[:mapping].each_pair do |column, mapping|
45
+ mappings << "#{column} = #{mapping}"
46
+ end
47
+ sql << "SET #{mappings.join(', ')} " if mappings.size > 0
48
+ end
49
+ sql << ";"
50
+ connection.execute(sql)
51
+ end
52
+ end
53
53
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jsuchal-activerecord-fast-import
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jan Suchal
@@ -40,12 +40,12 @@ extensions: []
40
40
 
41
41
  extra_rdoc_files:
42
42
  - LICENSE
43
- - README.rdoc
43
+ - README.markdown
44
44
  files:
45
45
  - .document
46
46
  - .gitignore
47
47
  - LICENSE
48
- - README.rdoc
48
+ - README.markdown
49
49
  - Rakefile
50
50
  - VERSION
51
51
  - activerecord-fast-import.gemspec
data/README.rdoc DELETED
@@ -1,5 +0,0 @@
1
- = activerecord-fast-import
2
-
3
- == Copyright
4
-
5
- Copyright (c) 2009 Jan Suchal. See LICENSE for details.