usda-nutrient-database 2.0.0 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +37 -0
- data/lib/usda-nutrient-database.rb +8 -0
- data/lib/usda_nutrient_database/configuration.rb +6 -1
- data/lib/usda_nutrient_database/import/base.rb +22 -3
- data/lib/usda_nutrient_database/import/food_groups.rb +10 -0
- data/lib/usda_nutrient_database/import/foods.rb +10 -0
- data/lib/usda_nutrient_database/import/foods_nutrients.rb +28 -2
- data/lib/usda_nutrient_database/import/footnotes.rb +10 -0
- data/lib/usda_nutrient_database/import/nutrients.rb +10 -0
- data/lib/usda_nutrient_database/import/source_codes.rb +11 -1
- data/lib/usda_nutrient_database/import/weights.rb +10 -0
- data/lib/usda_nutrient_database/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 35f7dffdf433c3c075779ed74ffc85775784d873
|
4
|
+
data.tar.gz: 1290cbc6ab3f2abedf992dd1605e44132fe308a0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0e2db9e3b9a216a9d0c3f1bebc8f0f9a642f1c9d7bbe0c970a70a2a6481159b14e465f39d1861966eed9e7420c909519e7b8b08e24f4b1fd87b70c2d2bb5f5be
|
7
|
+
data.tar.gz: f9319fd8b497e4df8b827d51bf2a69b0ffafb37b6e41ac24ac239dc540828830791bfc8edbd07b1fceeca610d91a826b053abd727c2541a98f9b4a7c67a7ac92
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 2.1.0
|
4
|
+
|
5
|
+
- Made it work with `activerecord-import` gem to speed up import times by 30x
|
6
|
+
[#16](https://github.com/mattbeedle/usda-nutrient-database/pull/16)
|
7
|
+
|
3
8
|
## 2.0.0
|
4
9
|
|
5
10
|
- Removed IDs from tables which don't have them in USDA data. Instead opted to
|
data/README.md
CHANGED
@@ -47,8 +47,43 @@ If you're using rails then copy the migrations across:
|
|
47
47
|
rake usda_nutrient_database_engine:install:migrations
|
48
48
|
```
|
49
49
|
|
50
|
+
## Configuration
|
51
|
+
|
52
|
+
```ruby
|
53
|
+
UsdaNutrientDatabase.configure do |config|
|
54
|
+
config.batch_size = 20000 # import batch size, if using activerecord-import
|
55
|
+
config.perform_logging = true # default false
|
56
|
+
config.logger = Rails.logger # default Logger.new(STDOUT)
|
57
|
+
config.usda_version = 'sr25' # default sr28
|
58
|
+
end
|
59
|
+
```
|
60
|
+
|
50
61
|
## Usage
|
51
62
|
|
63
|
+
### Importing with UPSERT (takes around 2 minutes)
|
64
|
+
|
65
|
+
If you are running MySQL => 5.6 or PostgreSQL => 9.5 then you're in luck, you
|
66
|
+
can use UPSERT (insert or update) to speed up imports x30. To do this you're
|
67
|
+
going to need to install
|
68
|
+
[activerecord-import](https://github.com/zdennis/activerecord-import)
|
69
|
+
|
70
|
+
```ruby
|
71
|
+
require 'activerecord-import/base'
|
72
|
+
ActiveRecord::Import.require_adapter('postgresql')
|
73
|
+
|
74
|
+
# You may want to disable logging during this process to avoid dumping huge SQL
|
75
|
+
# strings in to your logs
|
76
|
+
ActiveRecord::Base.logger = Logger.new('/dev/null')
|
77
|
+
```
|
78
|
+
|
79
|
+
Now run the rake task which will import everything in around 2 minutes.
|
80
|
+
```
|
81
|
+
rake usda:import
|
82
|
+
```
|
83
|
+
|
84
|
+
|
85
|
+
### Importing without UPSERT (takes 60+ minutes)
|
86
|
+
|
52
87
|
Import the latest data with the import task:
|
53
88
|
```
|
54
89
|
rake usda:import
|
@@ -60,6 +95,8 @@ tasks:
|
|
60
95
|
rake -T usda
|
61
96
|
```
|
62
97
|
|
98
|
+
### Models
|
99
|
+
|
63
100
|
Use the models to query and profit:
|
64
101
|
```
|
65
102
|
UsdaNutrientDatabase::FoodGroup
|
@@ -32,10 +32,18 @@ module UsdaNutrientDatabase
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
+
def activerecord_import?
|
36
|
+
defined?(ActiveRecord::Import)
|
37
|
+
end
|
38
|
+
|
35
39
|
def configuration
|
36
40
|
@configuration ||= UsdaNutrientDatabase::Configuration.new
|
37
41
|
end
|
38
42
|
|
43
|
+
def batch_size
|
44
|
+
@batch_fize ||= configuration.batch_size
|
45
|
+
end
|
46
|
+
|
39
47
|
def usda_version
|
40
48
|
@usda_version ||= configuration.usda_version
|
41
49
|
end
|
@@ -1,9 +1,14 @@
|
|
1
1
|
module UsdaNutrientDatabase
|
2
2
|
class Configuration
|
3
3
|
attr_accessor :logger
|
4
|
-
attr_writer
|
4
|
+
attr_writer :batch_size,
|
5
|
+
:perform_logging,
|
5
6
|
:usda_version
|
6
7
|
|
8
|
+
def batch_size
|
9
|
+
@batch_size ||= 10000
|
10
|
+
end
|
11
|
+
|
7
12
|
def logger
|
8
13
|
@logger ||= Logger.new(STDOUT)
|
9
14
|
end
|
@@ -5,24 +5,35 @@ module UsdaNutrientDatabase
|
|
5
5
|
class Base
|
6
6
|
def initialize(directory)
|
7
7
|
@directory = directory
|
8
|
+
@objects_to_import = []
|
8
9
|
end
|
9
10
|
|
10
11
|
def import
|
11
12
|
log_import_started
|
12
13
|
CSV.open(file_location, 'r:iso-8859-1:utf-8', csv_options) do |csv|
|
13
|
-
csv.each { |row| extract_row(row) }
|
14
|
+
csv.each { |row| objects_to_import << extract_row(row) }
|
14
15
|
end
|
16
|
+
ar_import? ? save_objects : objects_to_import.each(&:save)
|
15
17
|
end
|
16
18
|
|
17
19
|
private
|
18
20
|
|
19
|
-
attr_reader :directory
|
21
|
+
attr_reader :directory,
|
22
|
+
:objects_to_import
|
23
|
+
|
24
|
+
def ar_import?
|
25
|
+
UsdaNutrientDatabase.activerecord_import?
|
26
|
+
end
|
20
27
|
|
21
28
|
def extract_row(row)
|
22
|
-
build_object(apply_typecasts(row))
|
29
|
+
build_object(apply_typecasts(row))
|
23
30
|
end
|
24
31
|
|
25
32
|
def build_object(row)
|
33
|
+
ar_import? ? build_values_array(row) : build_ar_object(row)
|
34
|
+
end
|
35
|
+
|
36
|
+
def build_ar_object(row)
|
26
37
|
find_or_initialize(row).tap do |object|
|
27
38
|
columns.each_with_index do |column, index|
|
28
39
|
object.send("#{column}=", row[index])
|
@@ -30,6 +41,10 @@ module UsdaNutrientDatabase
|
|
30
41
|
end
|
31
42
|
end
|
32
43
|
|
44
|
+
def build_values_array(row)
|
45
|
+
columns.map.with_index { |_, index| row[index] }
|
46
|
+
end
|
47
|
+
|
33
48
|
def columns
|
34
49
|
raise NotImplementedError
|
35
50
|
end
|
@@ -53,6 +68,10 @@ module UsdaNutrientDatabase
|
|
53
68
|
def csv_options
|
54
69
|
{ col_sep: '^', quote_char: '~' }
|
55
70
|
end
|
71
|
+
|
72
|
+
def save_objects
|
73
|
+
raise NotImplementedError
|
74
|
+
end
|
56
75
|
end
|
57
76
|
end
|
58
77
|
end
|
@@ -19,6 +19,16 @@ module UsdaNutrientDatabase
|
|
19
19
|
def log_import_started
|
20
20
|
UsdaNutrientDatabase.log 'Importing food groups'
|
21
21
|
end
|
22
|
+
|
23
|
+
def save_objects
|
24
|
+
UsdaNutrientDatabase::FoodGroup.import(columns, objects_to_import, {
|
25
|
+
validate: false,
|
26
|
+
on_duplicate_key_update: {
|
27
|
+
conflict_target: :code,
|
28
|
+
columns: %i(description)
|
29
|
+
}
|
30
|
+
})
|
31
|
+
end
|
22
32
|
end
|
23
33
|
end
|
24
34
|
end
|
@@ -32,6 +32,16 @@ module UsdaNutrientDatabase
|
|
32
32
|
:protein_factor, :fat_factor, :carbohydrate_factor
|
33
33
|
]
|
34
34
|
end
|
35
|
+
|
36
|
+
def save_objects
|
37
|
+
UsdaNutrientDatabase::Food.import(columns, objects_to_import, {
|
38
|
+
validate: false,
|
39
|
+
on_duplicate_key_update: {
|
40
|
+
conflict_target: :nutrient_databank_number,
|
41
|
+
columns: columns
|
42
|
+
}
|
43
|
+
})
|
44
|
+
end
|
35
45
|
end
|
36
46
|
end
|
37
47
|
end
|
@@ -4,6 +4,11 @@ module UsdaNutrientDatabase
|
|
4
4
|
|
5
5
|
private
|
6
6
|
|
7
|
+
def apply_typecasts(row)
|
8
|
+
row[8] = row[8] == 'Y'
|
9
|
+
row
|
10
|
+
end
|
11
|
+
|
7
12
|
def columns
|
8
13
|
[
|
9
14
|
:nutrient_databank_number, :nutrient_number, :nutrient_value,
|
@@ -15,8 +20,9 @@ module UsdaNutrientDatabase
|
|
15
20
|
end
|
16
21
|
|
17
22
|
def find_or_initialize(row)
|
18
|
-
UsdaNutrientDatabase::FoodsNutrient.
|
19
|
-
nutrient_databank_number: row[0],
|
23
|
+
UsdaNutrientDatabase::FoodsNutrient.new(
|
24
|
+
nutrient_databank_number: row[0],
|
25
|
+
nutrient_number: row[3]
|
20
26
|
)
|
21
27
|
end
|
22
28
|
|
@@ -27,6 +33,26 @@ module UsdaNutrientDatabase
|
|
27
33
|
def log_import_started
|
28
34
|
UsdaNutrientDatabase.log 'Importing foods_nutrients'
|
29
35
|
end
|
36
|
+
|
37
|
+
def save_objects
|
38
|
+
options = {
|
39
|
+
batch_size: UsdaNutrientDatabase.batch_size,
|
40
|
+
validate: false
|
41
|
+
}
|
42
|
+
if UsdaNutrientDatabase::FoodsNutrient.exists?
|
43
|
+
options.merge!(
|
44
|
+
on_duplicate_key_update: {
|
45
|
+
conflict_target: %i(nutrient_databank_number nutrient_number),
|
46
|
+
columns: columns
|
47
|
+
}
|
48
|
+
)
|
49
|
+
end
|
50
|
+
UsdaNutrientDatabase::FoodsNutrient.import(
|
51
|
+
columns,
|
52
|
+
objects_to_import,
|
53
|
+
options
|
54
|
+
)
|
55
|
+
end
|
30
56
|
end
|
31
57
|
end
|
32
58
|
end
|
@@ -25,6 +25,16 @@ module UsdaNutrientDatabase
|
|
25
25
|
def log_import_started
|
26
26
|
UsdaNutrientDatabase.log 'Importing footnotes'
|
27
27
|
end
|
28
|
+
|
29
|
+
def save_objects
|
30
|
+
UsdaNutrientDatabase::Footnote.import(columns, objects_to_import, {
|
31
|
+
validate: false,
|
32
|
+
on_duplicate_key_update: {
|
33
|
+
conflict_target: %i(nutrient_databank_number nutrient_number footnote_number),
|
34
|
+
columns: columns
|
35
|
+
}
|
36
|
+
})
|
37
|
+
end
|
28
38
|
end
|
29
39
|
end
|
30
40
|
end
|
@@ -23,6 +23,16 @@ module UsdaNutrientDatabase
|
|
23
23
|
def log_import_started
|
24
24
|
UsdaNutrientDatabase.log 'Importing nutrients'
|
25
25
|
end
|
26
|
+
|
27
|
+
def save_objects
|
28
|
+
UsdaNutrientDatabase::Nutrient.import(columns, objects_to_import, {
|
29
|
+
validate: false,
|
30
|
+
on_duplicate_key_update: {
|
31
|
+
conflict_target: :nutrient_number,
|
32
|
+
columns: columns
|
33
|
+
}
|
34
|
+
})
|
35
|
+
end
|
26
36
|
end
|
27
37
|
end
|
28
38
|
end
|
@@ -13,12 +13,22 @@ module UsdaNutrientDatabase
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def log_import_started
|
16
|
-
UsdaNutrientDatabase.log '
|
16
|
+
UsdaNutrientDatabase.log 'Import source codes'
|
17
17
|
end
|
18
18
|
|
19
19
|
def filename
|
20
20
|
'SRC_CD.txt'
|
21
21
|
end
|
22
|
+
|
23
|
+
def save_objects
|
24
|
+
UsdaNutrientDatabase::SourceCode.import(columns, objects_to_import, {
|
25
|
+
validate: false,
|
26
|
+
on_duplicate_key_update: {
|
27
|
+
conflict_target: :code,
|
28
|
+
columns: columns
|
29
|
+
}
|
30
|
+
})
|
31
|
+
end
|
22
32
|
end
|
23
33
|
end
|
24
34
|
end
|
@@ -26,6 +26,16 @@ module UsdaNutrientDatabase
|
|
26
26
|
def log_import_started
|
27
27
|
UsdaNutrientDatabase.log 'Importing weights'
|
28
28
|
end
|
29
|
+
|
30
|
+
def save_objects
|
31
|
+
UsdaNutrientDatabase::Weight.import(columns, objects_to_import, {
|
32
|
+
validate: false,
|
33
|
+
on_duplicate_key_update: {
|
34
|
+
conflict_target: %i(nutrient_databank_number sequence_number),
|
35
|
+
columns: columns
|
36
|
+
}
|
37
|
+
})
|
38
|
+
end
|
29
39
|
end
|
30
40
|
end
|
31
41
|
end
|