usda-nutrient-database 2.0.0 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 55eaf87a81739b7b4da9d592c67eca5fcd857801
4
- data.tar.gz: e42ac18382b777910c4cbb6ca0546a48aaa3a48f
3
+ metadata.gz: 35f7dffdf433c3c075779ed74ffc85775784d873
4
+ data.tar.gz: 1290cbc6ab3f2abedf992dd1605e44132fe308a0
5
5
  SHA512:
6
- metadata.gz: 616a437099608f3b36179de590f12b81b97e1996e926a8d83b75118175508e51147db803560792a8b572f69252be0352b7c23e5e6f338fd30136c73009a330f3
7
- data.tar.gz: dedcb6ef862b1034fa15086f76e8059ff689a1c6f06f626392cc0790761541c22bdec009a922034e8dd184ccda6abb1b36117689242aa293fbdddac219448b85
6
+ metadata.gz: 0e2db9e3b9a216a9d0c3f1bebc8f0f9a642f1c9d7bbe0c970a70a2a6481159b14e465f39d1861966eed9e7420c909519e7b8b08e24f4b1fd87b70c2d2bb5f5be
7
+ data.tar.gz: f9319fd8b497e4df8b827d51bf2a69b0ffafb37b6e41ac24ac239dc540828830791bfc8edbd07b1fceeca610d91a826b053abd727c2541a98f9b4a7c67a7ac92
@@ -1,5 +1,10 @@
1
1
  # Changelog
2
2
 
3
+ ## 2.1.0
4
+
5
+ - Made it work with `activerecord-import` gem to speed up import times by 30x
6
+ [#16](https://github.com/mattbeedle/usda-nutrient-database/pull/16)
7
+
3
8
  ## 2.0.0
4
9
 
5
10
  - Removed IDs from tables which don't have them in USDA data. Instead opted to
data/README.md CHANGED
@@ -47,8 +47,43 @@ If you're using rails then copy the migrations across:
47
47
  rake usda_nutrient_database_engine:install:migrations
48
48
  ```
49
49
 
50
+ ## Configuration
51
+
52
+ ```ruby
53
+ UsdaNutrientDatabase.configure do |config|
54
+ config.batch_size = 20000 # import batch size, if using activerecord-import
55
+ config.perform_logging = true # default false
56
+ config.logger = Rails.logger # default Logger.new(STDOUT)
57
+ config.usda_version = 'sr25' # default sr28
58
+ end
59
+ ```
60
+
50
61
  ## Usage
51
62
 
63
+ ### Importing with UPSERT (takes around 2 minutes)
64
+
65
+ If you are running MySQL => 5.6 or PostgreSQL => 9.5 then you're in luck, you
66
+ can use UPSERT (insert or update) to speed up imports x30. To do this you're
67
+ going to need to install
68
+ [activerecord-import](https://github.com/zdennis/activerecord-import)
69
+
70
+ ```ruby
71
+ require 'activerecord-import/base'
72
+ ActiveRecord::Import.require_adapter('postgresql')
73
+
74
+ # You may want to disable logging during this process to avoid dumping huge SQL
75
+ # strings in to your logs
76
+ ActiveRecord::Base.logger = Logger.new('/dev/null')
77
+ ```
78
+
79
+ Now run the rake task which will import everything in around 2 minutes.
80
+ ```
81
+ rake usda:import
82
+ ```
83
+
84
+
85
+ ### Importing without UPSERT (takes 60+ minutes)
86
+
52
87
  Import the latest data with the import task:
53
88
  ```
54
89
  rake usda:import
@@ -60,6 +95,8 @@ tasks:
60
95
  rake -T usda
61
96
  ```
62
97
 
98
+ ### Models
99
+
63
100
  Use the models to query and profit:
64
101
  ```
65
102
  UsdaNutrientDatabase::FoodGroup
@@ -32,10 +32,18 @@ module UsdaNutrientDatabase
32
32
  end
33
33
  end
34
34
 
35
+ def activerecord_import?
36
+ defined?(ActiveRecord::Import)
37
+ end
38
+
35
39
  def configuration
36
40
  @configuration ||= UsdaNutrientDatabase::Configuration.new
37
41
  end
38
42
 
43
+ def batch_size
44
+ @batch_fize ||= configuration.batch_size
45
+ end
46
+
39
47
  def usda_version
40
48
  @usda_version ||= configuration.usda_version
41
49
  end
@@ -1,9 +1,14 @@
1
1
  module UsdaNutrientDatabase
2
2
  class Configuration
3
3
  attr_accessor :logger
4
- attr_writer :perform_logging,
4
+ attr_writer :batch_size,
5
+ :perform_logging,
5
6
  :usda_version
6
7
 
8
+ def batch_size
9
+ @batch_size ||= 10000
10
+ end
11
+
7
12
  def logger
8
13
  @logger ||= Logger.new(STDOUT)
9
14
  end
@@ -5,24 +5,35 @@ module UsdaNutrientDatabase
5
5
  class Base
6
6
  def initialize(directory)
7
7
  @directory = directory
8
+ @objects_to_import = []
8
9
  end
9
10
 
10
11
  def import
11
12
  log_import_started
12
13
  CSV.open(file_location, 'r:iso-8859-1:utf-8', csv_options) do |csv|
13
- csv.each { |row| extract_row(row) }
14
+ csv.each { |row| objects_to_import << extract_row(row) }
14
15
  end
16
+ ar_import? ? save_objects : objects_to_import.each(&:save)
15
17
  end
16
18
 
17
19
  private
18
20
 
19
- attr_reader :directory
21
+ attr_reader :directory,
22
+ :objects_to_import
23
+
24
+ def ar_import?
25
+ UsdaNutrientDatabase.activerecord_import?
26
+ end
20
27
 
21
28
  def extract_row(row)
22
- build_object(apply_typecasts(row)).save
29
+ build_object(apply_typecasts(row))
23
30
  end
24
31
 
25
32
  def build_object(row)
33
+ ar_import? ? build_values_array(row) : build_ar_object(row)
34
+ end
35
+
36
+ def build_ar_object(row)
26
37
  find_or_initialize(row).tap do |object|
27
38
  columns.each_with_index do |column, index|
28
39
  object.send("#{column}=", row[index])
@@ -30,6 +41,10 @@ module UsdaNutrientDatabase
30
41
  end
31
42
  end
32
43
 
44
+ def build_values_array(row)
45
+ columns.map.with_index { |_, index| row[index] }
46
+ end
47
+
33
48
  def columns
34
49
  raise NotImplementedError
35
50
  end
@@ -53,6 +68,10 @@ module UsdaNutrientDatabase
53
68
  def csv_options
54
69
  { col_sep: '^', quote_char: '~' }
55
70
  end
71
+
72
+ def save_objects
73
+ raise NotImplementedError
74
+ end
56
75
  end
57
76
  end
58
77
  end
@@ -19,6 +19,16 @@ module UsdaNutrientDatabase
19
19
  def log_import_started
20
20
  UsdaNutrientDatabase.log 'Importing food groups'
21
21
  end
22
+
23
+ def save_objects
24
+ UsdaNutrientDatabase::FoodGroup.import(columns, objects_to_import, {
25
+ validate: false,
26
+ on_duplicate_key_update: {
27
+ conflict_target: :code,
28
+ columns: %i(description)
29
+ }
30
+ })
31
+ end
22
32
  end
23
33
  end
24
34
  end
@@ -32,6 +32,16 @@ module UsdaNutrientDatabase
32
32
  :protein_factor, :fat_factor, :carbohydrate_factor
33
33
  ]
34
34
  end
35
+
36
+ def save_objects
37
+ UsdaNutrientDatabase::Food.import(columns, objects_to_import, {
38
+ validate: false,
39
+ on_duplicate_key_update: {
40
+ conflict_target: :nutrient_databank_number,
41
+ columns: columns
42
+ }
43
+ })
44
+ end
35
45
  end
36
46
  end
37
47
  end
@@ -4,6 +4,11 @@ module UsdaNutrientDatabase
4
4
 
5
5
  private
6
6
 
7
+ def apply_typecasts(row)
8
+ row[8] = row[8] == 'Y'
9
+ row
10
+ end
11
+
7
12
  def columns
8
13
  [
9
14
  :nutrient_databank_number, :nutrient_number, :nutrient_value,
@@ -15,8 +20,9 @@ module UsdaNutrientDatabase
15
20
  end
16
21
 
17
22
  def find_or_initialize(row)
18
- UsdaNutrientDatabase::FoodsNutrient.find_or_initialize_by(
19
- nutrient_databank_number: row[0], nutrient_number: row[1]
23
+ UsdaNutrientDatabase::FoodsNutrient.new(
24
+ nutrient_databank_number: row[0],
25
+ nutrient_number: row[3]
20
26
  )
21
27
  end
22
28
 
@@ -27,6 +33,26 @@ module UsdaNutrientDatabase
27
33
  def log_import_started
28
34
  UsdaNutrientDatabase.log 'Importing foods_nutrients'
29
35
  end
36
+
37
+ def save_objects
38
+ options = {
39
+ batch_size: UsdaNutrientDatabase.batch_size,
40
+ validate: false
41
+ }
42
+ if UsdaNutrientDatabase::FoodsNutrient.exists?
43
+ options.merge!(
44
+ on_duplicate_key_update: {
45
+ conflict_target: %i(nutrient_databank_number nutrient_number),
46
+ columns: columns
47
+ }
48
+ )
49
+ end
50
+ UsdaNutrientDatabase::FoodsNutrient.import(
51
+ columns,
52
+ objects_to_import,
53
+ options
54
+ )
55
+ end
30
56
  end
31
57
  end
32
58
  end
@@ -25,6 +25,16 @@ module UsdaNutrientDatabase
25
25
  def log_import_started
26
26
  UsdaNutrientDatabase.log 'Importing footnotes'
27
27
  end
28
+
29
+ def save_objects
30
+ UsdaNutrientDatabase::Footnote.import(columns, objects_to_import, {
31
+ validate: false,
32
+ on_duplicate_key_update: {
33
+ conflict_target: %i(nutrient_databank_number nutrient_number footnote_number),
34
+ columns: columns
35
+ }
36
+ })
37
+ end
28
38
  end
29
39
  end
30
40
  end
@@ -23,6 +23,16 @@ module UsdaNutrientDatabase
23
23
  def log_import_started
24
24
  UsdaNutrientDatabase.log 'Importing nutrients'
25
25
  end
26
+
27
+ def save_objects
28
+ UsdaNutrientDatabase::Nutrient.import(columns, objects_to_import, {
29
+ validate: false,
30
+ on_duplicate_key_update: {
31
+ conflict_target: :nutrient_number,
32
+ columns: columns
33
+ }
34
+ })
35
+ end
26
36
  end
27
37
  end
28
38
  end
@@ -13,12 +13,22 @@ module UsdaNutrientDatabase
13
13
  end
14
14
 
15
15
  def log_import_started
16
- UsdaNutrientDatabase.log 'Source code import started'
16
+ UsdaNutrientDatabase.log 'Import source codes'
17
17
  end
18
18
 
19
19
  def filename
20
20
  'SRC_CD.txt'
21
21
  end
22
+
23
+ def save_objects
24
+ UsdaNutrientDatabase::SourceCode.import(columns, objects_to_import, {
25
+ validate: false,
26
+ on_duplicate_key_update: {
27
+ conflict_target: :code,
28
+ columns: columns
29
+ }
30
+ })
31
+ end
22
32
  end
23
33
  end
24
34
  end
@@ -26,6 +26,16 @@ module UsdaNutrientDatabase
26
26
  def log_import_started
27
27
  UsdaNutrientDatabase.log 'Importing weights'
28
28
  end
29
+
30
+ def save_objects
31
+ UsdaNutrientDatabase::Weight.import(columns, objects_to_import, {
32
+ validate: false,
33
+ on_duplicate_key_update: {
34
+ conflict_target: %i(nutrient_databank_number sequence_number),
35
+ columns: columns
36
+ }
37
+ })
38
+ end
29
39
  end
30
40
  end
31
41
  end
@@ -1,3 +1,3 @@
1
1
  module UsdaNutrientDatabase
2
- VERSION = '2.0.0'
2
+ VERSION = '2.1.0'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: usda-nutrient-database
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Beedle