datashift 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. data/.document +5 -5
  2. data/LICENSE.txt +26 -26
  3. data/README.markdown +305 -303
  4. data/README.rdoc +19 -19
  5. data/Rakefile +93 -93
  6. data/VERSION +1 -1
  7. data/datashift-0.1.0.gem +0 -0
  8. data/datashift.gemspec +152 -136
  9. data/lib/applications/jruby/jexcel_file.rb +408 -408
  10. data/lib/applications/jruby/word.rb +79 -79
  11. data/lib/datashift.rb +152 -152
  12. data/lib/datashift/exceptions.rb +11 -11
  13. data/lib/datashift/file_definitions.rb +353 -353
  14. data/lib/datashift/mapping_file_definitions.rb +87 -87
  15. data/lib/datashift/method_detail.rb +275 -275
  16. data/lib/datashift/method_dictionary.rb +209 -209
  17. data/lib/datashift/method_mapper.rb +90 -90
  18. data/lib/generators/csv_generator.rb +36 -36
  19. data/lib/generators/excel_generator.rb +122 -122
  20. data/lib/generators/generator_base.rb +13 -13
  21. data/lib/helpers/core_ext/to_b.rb +24 -24
  22. data/lib/helpers/spree_helper.rb +153 -155
  23. data/lib/java/poi-3.7/LICENSE +507 -507
  24. data/lib/java/poi-3.7/NOTICE +21 -21
  25. data/lib/java/poi-3.7/RELEASE_NOTES.txt +115 -115
  26. data/lib/loaders/csv_loader.rb +98 -98
  27. data/lib/loaders/excel_loader.rb +155 -155
  28. data/lib/loaders/loader_base.rb +420 -420
  29. data/lib/loaders/spreadsheet_loader.rb +136 -136
  30. data/lib/loaders/spree/image_loader.rb +63 -64
  31. data/lib/loaders/spree/product_loader.rb +248 -250
  32. data/public/spree/products/large/DEMO_001_ror_bag.jpeg +0 -0
  33. data/public/spree/products/large/DEMO_002_Powerstation.jpg +0 -0
  34. data/public/spree/products/large/DEMO_003_ror_mug.jpeg +0 -0
  35. data/public/spree/products/mini/DEMO_001_ror_bag.jpeg +0 -0
  36. data/public/spree/products/mini/DEMO_002_Powerstation.jpg +0 -0
  37. data/public/spree/products/mini/DEMO_003_ror_mug.jpeg +0 -0
  38. data/public/spree/products/original/DEMO_001_ror_bag.jpeg +0 -0
  39. data/public/spree/products/original/DEMO_002_Powerstation.jpg +0 -0
  40. data/public/spree/products/original/DEMO_003_ror_mug.jpeg +0 -0
  41. data/public/spree/products/product/DEMO_001_ror_bag.jpeg +0 -0
  42. data/public/spree/products/product/DEMO_002_Powerstation.jpg +0 -0
  43. data/public/spree/products/product/DEMO_003_ror_mug.jpeg +0 -0
  44. data/public/spree/products/small/DEMO_001_ror_bag.jpeg +0 -0
  45. data/public/spree/products/small/DEMO_002_Powerstation.jpg +0 -0
  46. data/public/spree/products/small/DEMO_003_ror_mug.jpeg +0 -0
  47. data/spec/csv_loader_spec.rb +30 -30
  48. data/spec/datashift_spec.rb +26 -26
  49. data/spec/db/migrate/20110803201325_create_test_bed.rb +85 -85
  50. data/spec/excel_exporter_spec.rb +78 -78
  51. data/spec/excel_generator_spec.rb +78 -78
  52. data/spec/excel_loader_spec.rb +223 -223
  53. data/spec/file_definitions.rb +141 -141
  54. data/spec/fixtures/ProjectsDefaults.yml +29 -29
  55. data/spec/fixtures/config/database.yml +27 -24
  56. data/spec/fixtures/datashift_Spree_db.sqlite +0 -0
  57. data/spec/fixtures/interact_models_db.sqlite +0 -0
  58. data/spec/fixtures/negative/SpreeProdMiss1Mandatory.csv +4 -4
  59. data/spec/fixtures/negative/SpreeProdMissManyMandatory.csv +4 -4
  60. data/spec/fixtures/spree/SpreeProducts.csv +4 -4
  61. data/spec/fixtures/spree/SpreeProductsMultiColumn.csv +4 -4
  62. data/spec/fixtures/spree/SpreeProductsSimple.csv +4 -4
  63. data/spec/fixtures/spree/SpreeProductsWithImages.csv +4 -0
  64. data/spec/fixtures/spree/SpreeZoneExample.csv +5 -5
  65. data/spec/fixtures/test_model_defs.rb +57 -57
  66. data/spec/loader_spec.rb +120 -120
  67. data/spec/method_dictionary_spec.rb +242 -242
  68. data/spec/method_mapper_spec.rb +41 -41
  69. data/spec/spec_helper.rb +116 -116
  70. data/spec/spree_generator_spec.rb +64 -64
  71. data/spec/spree_loader_spec.rb +324 -327
  72. data/spec/spree_method_mapping_spec.rb +214 -214
  73. data/tasks/config/seed_fu_product_template.erb +15 -15
  74. data/tasks/config/tidy_config.txt +12 -12
  75. data/tasks/db_tasks.rake +65 -65
  76. data/tasks/excel_generator.rake +78 -78
  77. data/tasks/file_tasks.rake +36 -36
  78. data/tasks/import/csv.rake +49 -49
  79. data/tasks/import/excel.rake +71 -71
  80. data/tasks/spree/image_load.rake +108 -108
  81. data/tasks/spree/product_loader.rake +43 -43
  82. data/tasks/word_to_seedfu.rake +166 -166
  83. data/test/helper.rb +18 -18
  84. data/test/test_interact.rb +7 -7
  85. metadata +22 -3
  86. data/spec/fixtures/interact_spree_db.sqlite +0 -0
@@ -1,72 +1,72 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Feb 2011
4
- # License:: TBD. Free, Open Source. MIT ?
5
- #
6
- # REQUIRES: JRuby
7
- #
8
- # Usage::
9
- #
10
- # In Rakefile:
11
- #
12
- # require 'datashift'
13
- #
14
- # DataShift::load_tasks
15
- #
16
- # Cmd Line:
17
- #
18
- # => jruby -S rake datashift:import:excel model=<active record class> input=<file.xls>
19
- # => jruby -S rake datashift:import:excel model=<active record class> input=C:\MyProducts.xlsverbose=true
20
- #
21
- require 'datashift'
22
- require 'excel_loader'
23
-
24
- namespace :datashift do
25
-
26
- namespace :import do
27
-
28
- desc "Populate model's table with data from .xls (Excel) file"
29
-
30
- task :excel, [:model, :loader, :input, :config, :verbose] => [:environment] do |t, args|
31
-
32
- # in familiar ruby style args seems to have been become empty using this new style for rake 0.9.2
33
- # whatever format i try, on both Win and OSX .. so had to revert back to ENV
34
- model = ENV['model']
35
- input = ENV['input']
36
-
37
- raise "USAGE: jruby -S rake datashift:import:excel input=excel_file.xls model=<Class>" unless(input)
38
- raise "ERROR: Cannot process without AR Model - please supply model=<Class>" unless(model)
39
- raise "ERROR: Could not find file #{args[:input]}" unless File.exists?(input)
40
-
41
- begin
42
- klass = Kernel.const_get(model)
43
- rescue NameError
44
- raise "ERROR: No such AR Model found - check valid model supplied via model=<Class>"
45
- end
46
-
47
- if(ENV['loader'])
48
- begin
49
- loader_klass = Kernel.const_get(ENV['loader'])
50
-
51
- loader = loader_klass.new(klass)
52
-
53
- puts "INFO: Using loader : #{loader.class}"
54
- rescue
55
- puts "INFO: No specific #{model}Loader found - using generic ExcelLoader"
56
- loader = DataShift::ExcelLoader.new(klass)
57
- end
58
- else
59
- puts "INFO: No Loader specified - using generic ExcelLoader"
60
- loader = DataShift::ExcelLoader.new(klass)
61
- end
62
-
63
- puts "ARGS #{args.inspect} #{args[:verbose]} [#{args.verbose}]"
64
- loader.logger.verbose if(ENV['verbose'])
65
-
66
- loader.configure_from( ENV['config'] ) if(ENV['config'])
67
-
68
- loader.perform_load(input)
69
- end
70
-
71
- end
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Feb 2011
4
+ # License:: TBD. Free, Open Source. MIT ?
5
+ #
6
+ # REQUIRES: JRuby
7
+ #
8
+ # Usage::
9
+ #
10
+ # In Rakefile:
11
+ #
12
+ # require 'datashift'
13
+ #
14
+ # DataShift::load_tasks
15
+ #
16
+ # Cmd Line:
17
+ #
18
+ # => jruby -S rake datashift:import:excel model=<active record class> input=<file.xls>
19
+ # => jruby -S rake datashift:import:excel model=<active record class> input=C:\MyProducts.xlsverbose=true
20
+ #
21
+ require 'datashift'
22
+ require 'excel_loader'
23
+
24
+ namespace :datashift do
25
+
26
+ namespace :import do
27
+
28
+ desc "Populate model's table with data from .xls (Excel) file"
29
+
30
+ task :excel, [:model, :loader, :input, :config, :verbose] => [:environment] do |t, args|
31
+
32
+ # in familiar ruby style args seems to have been become empty using this new style for rake 0.9.2
33
+ # whatever format i try, on both Win and OSX .. so had to revert back to ENV
34
+ model = ENV['model']
35
+ input = ENV['input']
36
+
37
+ raise "USAGE: jruby -S rake datashift:import:excel input=excel_file.xls model=<Class>" unless(input)
38
+ raise "ERROR: Cannot process without AR Model - please supply model=<Class>" unless(model)
39
+ raise "ERROR: Could not find file #{args[:input]}" unless File.exists?(input)
40
+
41
+ begin
42
+ klass = Kernel.const_get(model)
43
+ rescue NameError
44
+ raise "ERROR: No such AR Model found - check valid model supplied via model=<Class>"
45
+ end
46
+
47
+ if(ENV['loader'])
48
+ begin
49
+ loader_klass = Kernel.const_get(ENV['loader'])
50
+
51
+ loader = loader_klass.new(klass)
52
+
53
+ puts "INFO: Using loader : #{loader.class}"
54
+ rescue
55
+ puts "INFO: No specific #{model}Loader found - using generic ExcelLoader"
56
+ loader = DataShift::ExcelLoader.new(klass)
57
+ end
58
+ else
59
+ puts "INFO: No Loader specified - using generic ExcelLoader"
60
+ loader = DataShift::ExcelLoader.new(klass)
61
+ end
62
+
63
+ puts "ARGS #{args.inspect} #{args[:verbose]} [#{args.verbose}]"
64
+ loader.logger.verbose if(ENV['verbose'])
65
+
66
+ loader.configure_from( ENV['config'] ) if(ENV['config'])
67
+
68
+ loader.perform_load(input)
69
+ end
70
+
71
+ end
72
72
  end
@@ -1,109 +1,109 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Feb 2011
4
- # License:: MIT
5
- #
6
- # Usage::
7
- #
8
- # => rake datashift:spree:images input=vendor/extensions/site/fixtures/images
9
- # => rake datashift:spree:images input=C:\images\photos large dummy=true
10
- #
11
- # => rake datashift:spree:images input=C:\images\taxon_icons skip_if_no_assoc=true klass=Taxon
12
- #
13
- namespace :datashift do
14
-
15
- namespace :spree do
16
-
17
- desc "Populate the DB with images.\nDefault location db/image_seeds, or specify :input=<path> or dir under db/image_seeds with :folder"
18
- # :dummy => dummy run without actual saving to DB
19
- task :images, [:input, :folder, :dummy, :sku, :skip_if_no_assoc, :skip_if_loaded, :model] => :environment do |t, args|
20
-
21
- require 'image_loader'
22
-
23
- raise "USAGE: Please specify one of :input or :folder" if(args[:input] && args[:folder])
24
- puts "SKU not specified " if(args[:input] && args[:folder])
25
-
26
- if args[:input]
27
- @image_cache = args[:input]
28
- else
29
- @image_cache = File.join(Rails.root, "db", "image_seeds")
30
- @image_cache = File.join(@image_cache, args[:folder]) if(args[:folder])
31
- end
32
-
33
- attachment_klazz = Product
34
-
35
- begin
36
- attachment_klazz = Kernel.const_get(args[:model]) if(args[:model])
37
- rescue NameError
38
- attachment_klazz = Product
39
- end
40
-
41
- image_loader = ImageLoader.new
42
-
43
- if(File.exists? @image_cache )
44
- puts "Loading images from #{@image_cache}"
45
-
46
- missing_records = []
47
- Dir.glob("#{@image_cache}/*.{jpg,png,gif}") do |image_name|
48
-
49
- puts "Processing #{image_name} : #{File.exists?(image_name)}"
50
- base_name = File.basename(image_name, '.*')
51
-
52
- record = nil
53
- if(attachment_klazz == Product && args[:sku])
54
- sku = base_name.slice!(/\w+/)
55
- sku.strip!
56
- base_name.strip!
57
-
58
- puts "Looking fo SKU #{sku}"
59
- record = Variant.find_by_sku(sku)
60
- if record
61
- record = record.product # SKU stored on Variant but we want it's master Product
62
- else
63
- puts "Looking for NAME [#{base_name}]"
64
- record = attachment_klazz.find_by_name(base_name)
65
- end
66
- else
67
- puts "Looking for #{attachment_klazz.name} with NAME [#{base_name}]"
68
- record = attachment_klazz.find_by_name(base_name)
69
- end
70
-
71
- if(record)
72
- puts "Found record for attachment : #{record.inspect}"
73
- exists = record.images.detect {|i| puts "COMPARE #{i.attachment_file_name} => #{image_name}"; i.attachment_file_name == image_name }
74
- puts "Found existing attachments [#{exists}]" unless(exists.nil?)
75
- if(args[:skip_if_loaded] && !exists.nil?)
76
- puts "Skipping - Image #{image_name} already loaded for #{attachment_klazz}"
77
- next
78
- end
79
- else
80
- missing_records << image_name
81
- end
82
-
83
- # Now do actual upload to DB unless we are doing a dummy run,
84
- # or the Image must have an associated record
85
- unless(args[:dummy] == 'true' || (args[:skip_if_no_assoc] && record.nil?))
86
- image_loader.reset()
87
- puts "Process Image"
88
- image_loader.process( image_name, record )
89
- end
90
-
91
- end
92
-
93
- unless missing_records.empty?
94
- FileUtils.mkdir_p('MissingRecords') unless File.directory?('MissingRecords')
95
-
96
- puts '\nMISSING Records Report>>'
97
- missing_records.each do |i|
98
- puts "Copy #{i} to MissingRecords folder"
99
- FileUtils.cp( i, 'MissingRecords') unless(args[:dummy] == 'true')
100
- end
101
- end
102
-
103
- else
104
- puts "ERROR: Supplied Path #{@image_cache} not accesible"
105
- exit(-1)
106
- end
107
- end
108
- end
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Feb 2011
4
+ # License:: MIT
5
+ #
6
+ # Usage::
7
+ #
8
+ # => rake datashift:spree:images input=vendor/extensions/site/fixtures/images
9
+ # => rake datashift:spree:images input=C:\images\photos large dummy=true
10
+ #
11
+ # => rake datashift:spree:images input=C:\images\taxon_icons skip_if_no_assoc=true klass=Taxon
12
+ #
13
+ namespace :datashift do
14
+
15
+ namespace :spree do
16
+
17
+ desc "Populate the DB with images.\nDefault location db/image_seeds, or specify :input=<path> or dir under db/image_seeds with :folder"
18
+ # :dummy => dummy run without actual saving to DB
19
+ task :images, [:input, :folder, :dummy, :sku, :skip_if_no_assoc, :skip_if_loaded, :model] => :environment do |t, args|
20
+
21
+ require 'image_loader'
22
+
23
+ raise "USAGE: Please specify one of :input or :folder" if(args[:input] && args[:folder])
24
+ puts "SKU not specified " if(args[:input] && args[:folder])
25
+
26
+ if args[:input]
27
+ @image_cache = args[:input]
28
+ else
29
+ @image_cache = File.join(Rails.root, "db", "image_seeds")
30
+ @image_cache = File.join(@image_cache, args[:folder]) if(args[:folder])
31
+ end
32
+
33
+ attachment_klazz = Product
34
+
35
+ begin
36
+ attachment_klazz = Kernel.const_get(args[:model]) if(args[:model])
37
+ rescue NameError
38
+ attachment_klazz = Product
39
+ end
40
+
41
+ image_loader = ImageLoader.new
42
+
43
+ if(File.exists? @image_cache )
44
+ puts "Loading images from #{@image_cache}"
45
+
46
+ missing_records = []
47
+ Dir.glob("#{@image_cache}/*.{jpg,png,gif}") do |image_name|
48
+
49
+ puts "Processing #{image_name} : #{File.exists?(image_name)}"
50
+ base_name = File.basename(image_name, '.*')
51
+
52
+ record = nil
53
+ if(attachment_klazz == Product && args[:sku])
54
+ sku = base_name.slice!(/\w+/)
55
+ sku.strip!
56
+ base_name.strip!
57
+
58
+ puts "Looking fo SKU #{sku}"
59
+ record = Variant.find_by_sku(sku)
60
+ if record
61
+ record = record.product # SKU stored on Variant but we want it's master Product
62
+ else
63
+ puts "Looking for NAME [#{base_name}]"
64
+ record = attachment_klazz.find_by_name(base_name)
65
+ end
66
+ else
67
+ puts "Looking for #{attachment_klazz.name} with NAME [#{base_name}]"
68
+ record = attachment_klazz.find_by_name(base_name)
69
+ end
70
+
71
+ if(record)
72
+ puts "Found record for attachment : #{record.inspect}"
73
+ exists = record.images.detect {|i| puts "COMPARE #{i.attachment_file_name} => #{image_name}"; i.attachment_file_name == image_name }
74
+ puts "Found existing attachments [#{exists}]" unless(exists.nil?)
75
+ if(args[:skip_if_loaded] && !exists.nil?)
76
+ puts "Skipping - Image #{image_name} already loaded for #{attachment_klazz}"
77
+ next
78
+ end
79
+ else
80
+ missing_records << image_name
81
+ end
82
+
83
+ # Now do actual upload to DB unless we are doing a dummy run,
84
+ # or the Image must have an associated record
85
+ unless(args[:dummy] == 'true' || (args[:skip_if_no_assoc] && record.nil?))
86
+ image_loader.reset()
87
+ puts "Process Image"
88
+ image_loader.process( image_name, record )
89
+ end
90
+
91
+ end
92
+
93
+ unless missing_records.empty?
94
+ FileUtils.mkdir_p('MissingRecords') unless File.directory?('MissingRecords')
95
+
96
+ puts '\nMISSING Records Report>>'
97
+ missing_records.each do |i|
98
+ puts "Copy #{i} to MissingRecords folder"
99
+ FileUtils.cp( i, 'MissingRecords') unless(args[:dummy] == 'true')
100
+ end
101
+ end
102
+
103
+ else
104
+ puts "ERROR: Supplied Path #{@image_cache} not accesible"
105
+ exit(-1)
106
+ end
107
+ end
108
+ end
109
109
  end
@@ -1,44 +1,44 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Feb 2011
4
- # License:: MIT. Free, Open Source.
5
- #
6
- # REQUIRES: JRuby access to Java
7
- #
8
- # Usage::
9
- #
10
- # e.g. => jruby -S rake datashift:spree:products input=vendor/extensions/autotelik/fixtures/SiteSpreadsheetInfo.xls
11
- # => jruby -S rake datashift:spree:products input=C:\MyProducts.xls verbose=true
12
- #
13
- require 'datashift'
14
-
15
- namespace :datashift do
16
-
17
- namespace :spree do
18
-
19
- desc "Populate Spree Product/Variant data from .xls (Excel) or CSV file"
20
- task :products, [:input, :verbose, :sku_prefix] => :environment do |t, args|
21
-
22
- input = ENV['input']
23
-
24
- raise "USAGE: jruby -S rake datashift:spree:products input=excel_file.xls" unless input
25
- raise "ERROR: Could not find file #{args[:input]}" unless File.exists?(input)
26
-
27
- require 'product_loader'
28
-
29
- # COLUMNS WITH DEFAULTS - TODO create YAML configuration file to drive defaults etc
30
-
31
- loader = DataShift::ProductLoader.new
32
-
33
- loader.set_default_value('available_on', Time.now.to_s(:db) )
34
- loader.set_default_value('cost_price', 0.0 )
35
-
36
- loader.set_prefix('sku', args[:sku_prefix] ) if(args[:sku_prefix])
37
-
38
- puts "Loading from file: #{input}"
39
-
40
- loader.perform_load(input, :mandatory => ['sku', 'name', 'price'] )
41
- end
42
- end
43
-
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Feb 2011
4
+ # License:: MIT. Free, Open Source.
5
+ #
6
+ # REQUIRES: JRuby access to Java
7
+ #
8
+ # Usage::
9
+ #
10
+ # e.g. => jruby -S rake datashift:spree:products input=vendor/extensions/autotelik/fixtures/SiteSpreadsheetInfo.xls
11
+ # => jruby -S rake datashift:spree:products input=C:\MyProducts.xls verbose=true
12
+ #
13
+ require 'datashift'
14
+
15
+ namespace :datashift do
16
+
17
+ namespace :spree do
18
+
19
+ desc "Populate Spree Product/Variant data from .xls (Excel) or CSV file"
20
+ task :products, [:input, :verbose, :sku_prefix] => :environment do |t, args|
21
+
22
+ input = ENV['input']
23
+
24
+ raise "USAGE: jruby -S rake datashift:spree:products input=excel_file.xls" unless input
25
+ raise "ERROR: Could not find file #{args[:input]}" unless File.exists?(input)
26
+
27
+ require 'product_loader'
28
+
29
+ # COLUMNS WITH DEFAULTS - TODO create YAML configuration file to drive defaults etc
30
+
31
+ loader = DataShift::ProductLoader.new
32
+
33
+ loader.set_default_value('available_on', Time.now.to_s(:db) )
34
+ loader.set_default_value('cost_price', 0.0 )
35
+
36
+ loader.set_prefix('sku', args[:sku_prefix] ) if(args[:sku_prefix])
37
+
38
+ puts "Loading from file: #{input}"
39
+
40
+ loader.perform_load(input, :mandatory => ['sku', 'name', 'price'] )
41
+ end
42
+ end
43
+
44
44
  end
@@ -1,167 +1,167 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Aug 2010
4
- #
5
- # License:: Free, OpenSource... MIT ?
6
- #
7
- # About:: Rake tasks to read Word documents, containing product descriptions,
8
- # convert to HTML, tidy the HTML and then create seed_fu ready fixtures,
9
- # from a template, with product description supplied by the HTML
10
- #
11
- # Note cleanest HTML is produced by this combination : saving with WdFormatHTML
12
- # not WdFormatFilteredHTML and using the '--word-2000', 'y' option to tidy
13
- # (don't use the '--bare' option)
14
- #
15
- # Not currently available for JRuby due to Win32Ole requirement
16
- #
17
- # Requires local exes available in PATH for :
18
- # Microsoft Word
19
- # HTML Tidy - http://tidy.sourceforge.net (Free)
20
- #
21
- require 'erb'
22
-
23
- namespace :datashift do
24
-
25
- desc "Convert MS Word to HTML and seed_fu fixtures. help=true for detailed usage."
26
-
27
- task :word2html, [:help] => [:environment] do |t, args|
28
- x =<<-EOS
29
-
30
- USAGE::
31
- Convert MS Word docs to HTML and seed_fu fixtures, by default searches for docs
32
- in RAILS_ROOT/doc/copy
33
-
34
- You can change the directory where Word document files are located
35
- with the COPY_PATH environment variable.
36
-
37
- Examples:
38
- # default, to convert all Word files for the current environment
39
- rake datashift:word2seedfu
40
-
41
- # to load seed files matching orders or customers
42
- rake db:seed SEED=orders,customers
43
-
44
- # to load files from RAILS_ROOT/features/fixtures
45
- rake db:seed FIXTURE_PATH=features/fixtures
46
- EOS
47
-
48
- if(args[:help])
49
- puts x
50
- exit(0)
51
- end
52
-
53
- site_extension_lib = File.join(SiteExtension.root, 'lib')
54
-
55
- require File.join(site_extension_lib, 'word')
56
-
57
- copy_path = ENV["COPY_PATH"] ? ENV["COPY_PATH"] : File.join(RAILS_ROOT, "doc", "copy")
58
- fixtures_path = ENV["FIXTURES_PATH"] ? ENV["FIXTURES_PATH"] : File.join(RAILS_ROOT, "db", "fixtures")
59
-
60
- copy_files = Dir[File.join(copy_path, '*.doc')]
61
-
62
- copy_files.each do |file|
63
-
64
- name = File.basename(file, '.doc')
65
-
66
- puts "\n== Generate raw HTML from #{name}.doc =="
67
-
68
- @word = Word.new(true)
69
-
70
- @word.open( file )
71
-
72
- html_file = File.join(copy_path, "#{name}.ms.html")
73
-
74
- @word.save_as_html( html_file )
75
-
76
- tidy_file = File.join(copy_path, "#{name}.html")
77
-
78
- tidy_config = File.join(site_extension_lib, 'tasks', 'tidy_config.txt')
79
-
80
- puts "tidy cmd line:", "tidy -config #{tidy_config} -clean --show-body-only y --word-2000 y --indent-spaces 2 -output #{tidy_file} #{html_file}"
81
-
82
- result = system("tidy", '-config', "#{tidy_config}", '-clean', '--show-body-only', 'y', '--word-2000', 'y', '--indent-spaces', '2', '-output', "#{tidy_file}", "#{html_file}")
83
-
84
- # TODO maybe report on result, $?
85
-
86
- File.open( tidy_file ) do |f|
87
- puts f.read
88
- end
89
-
90
- @word.quit
91
- end
92
- end
93
-
94
- desc "Convert MS Word to HTML and seed_fu fixtures. help=true for detailed usage."
95
- task :word2seedfu => :environment do
96
- site_extension_lib = File.join(SiteExtension.root, 'lib')
97
-
98
- require File.join(site_extension_lib, 'word')
99
-
100
- sku_id = ENV["INITIAL_SKU_ID"] ? ENV["INITIAL_SKU_ID"] : 0
101
- sku_prefix = ENV["SKU_PREFIX"] ? ENV["SKU_PREFIX"] : File.basename( RAILS_ROOT )
102
-
103
- seedfu_template = File.join(site_extension_lib, 'tasks', 'seed_fu_product_template.erb')
104
-
105
- begin
106
- File.open( seedfu_template ) do |f|
107
- @template = ERB.new(f.read)
108
- end
109
- rescue => e
110
- puts "ERROR: #{e.inspect}"
111
- puts "Cannot open or read template #{seedfu_template}"
112
- raise e
113
- end
114
-
115
- copy_path = ENV["COPY_PATH"] ? ENV["COPY_PATH"] : File.join(RAILS_ROOT, "doc", "copy")
116
- fixtures_path = ENV["FIXTURES_PATH"] ? ENV["FIXTURES_PATH"] : File.join(RAILS_ROOT, "db", "fixtures")
117
-
118
- copy_files = Dir[File.join(copy_path, '*.doc')]
119
-
120
- copy_files.each do |file|
121
-
122
- name = File.basename(file, '.doc')
123
-
124
- puts "\n== Generate raw HTML from #{name}.doc =="
125
-
126
- @word = Word.new(true)
127
-
128
- @word.open( file )
129
-
130
- html_file = File.join(copy_path, "#{name}.ms.html")
131
-
132
- @word.save_as_html( html_file )
133
-
134
- tidy_file = File.join(copy_path, "#{name}.html")
135
-
136
- tidy_config = File.join(site_extension_lib, 'tasks', 'tidy_config.txt')
137
-
138
- puts "tidy cmd line:", "tidy -config #{tidy_config} -clean --show-body-only y --word-2000 y --indent-spaces 2 -output #{tidy_file} #{html_file}"
139
-
140
- result = system("tidy", '-config', "#{tidy_config}", '-clean', '--show-body-only', 'y', '--word-2000', 'y', '--indent-spaces', '2', '-output', "#{tidy_file}", "#{html_file}")
141
-
142
- # TODO maybe report on result, $?
143
-
144
- File.open( tidy_file ) do |f|
145
- @description = f.read
146
- end
147
-
148
- sku_id_str = "%03d" % sku_id
149
-
150
- seed_file = "#{sku_id_str}_#{name.gsub(' ', '_')}.rb"
151
- puts "\n== Generate seed fu file #{seed_file} =="
152
-
153
- @sku = "#{sku_prefix}_#{sku_id_str}"
154
- @name = 'TODO'
155
-
156
- File.open( File.join(fixtures_path, seed_file), 'w' ) do |f|
157
- f.write @template.result(binding)
158
- puts "\nFile created: #{File.join(fixtures_path, seed_file)}"
159
- end
160
-
161
- sku_id += 1
162
-
163
- @word.quit
164
- end
165
-
166
- end
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Aug 2010
4
+ #
5
+ # License:: Free, OpenSource... MIT ?
6
+ #
7
+ # About:: Rake tasks to read Word documents, containing product descriptions,
8
+ # convert to HTML, tidy the HTML and then create seed_fu ready fixtures,
9
+ # from a template, with product description supplied by the HTML
10
+ #
11
+ # Note cleanest HTML is produced by this combination : saving with WdFormatHTML
12
+ # not WdFormatFilteredHTML and using the '--word-2000', 'y' option to tidy
13
+ # (don't use the '--bare' option)
14
+ #
15
+ # Not currently available for JRuby due to Win32Ole requirement
16
+ #
17
+ # Requires local exes available in PATH for :
18
+ # Microsoft Word
19
+ # HTML Tidy - http://tidy.sourceforge.net (Free)
20
+ #
21
+ require 'erb'
22
+
23
+ namespace :datashift do
24
+
25
+ desc "Convert MS Word to HTML and seed_fu fixtures. help=true for detailed usage."
26
+
27
+ task :word2html, [:help] => [:environment] do |t, args|
28
+ x =<<-EOS
29
+
30
+ USAGE::
31
+ Convert MS Word docs to HTML and seed_fu fixtures, by default searches for docs
32
+ in RAILS_ROOT/doc/copy
33
+
34
+ You can change the directory where Word document files are located
35
+ with the COPY_PATH environment variable.
36
+
37
+ Examples:
38
+ # default, to convert all Word files for the current environment
39
+ rake datashift:word2seedfu
40
+
41
+ # to load seed files matching orders or customers
42
+ rake db:seed SEED=orders,customers
43
+
44
+ # to load files from RAILS_ROOT/features/fixtures
45
+ rake db:seed FIXTURE_PATH=features/fixtures
46
+ EOS
47
+
48
+ if(args[:help])
49
+ puts x
50
+ exit(0)
51
+ end
52
+
53
+ site_extension_lib = File.join(SiteExtension.root, 'lib')
54
+
55
+ require File.join(site_extension_lib, 'word')
56
+
57
+ copy_path = ENV["COPY_PATH"] ? ENV["COPY_PATH"] : File.join(RAILS_ROOT, "doc", "copy")
58
+ fixtures_path = ENV["FIXTURES_PATH"] ? ENV["FIXTURES_PATH"] : File.join(RAILS_ROOT, "db", "fixtures")
59
+
60
+ copy_files = Dir[File.join(copy_path, '*.doc')]
61
+
62
+ copy_files.each do |file|
63
+
64
+ name = File.basename(file, '.doc')
65
+
66
+ puts "\n== Generate raw HTML from #{name}.doc =="
67
+
68
+ @word = Word.new(true)
69
+
70
+ @word.open( file )
71
+
72
+ html_file = File.join(copy_path, "#{name}.ms.html")
73
+
74
+ @word.save_as_html( html_file )
75
+
76
+ tidy_file = File.join(copy_path, "#{name}.html")
77
+
78
+ tidy_config = File.join(site_extension_lib, 'tasks', 'tidy_config.txt')
79
+
80
+ puts "tidy cmd line:", "tidy -config #{tidy_config} -clean --show-body-only y --word-2000 y --indent-spaces 2 -output #{tidy_file} #{html_file}"
81
+
82
+ result = system("tidy", '-config', "#{tidy_config}", '-clean', '--show-body-only', 'y', '--word-2000', 'y', '--indent-spaces', '2', '-output', "#{tidy_file}", "#{html_file}")
83
+
84
+ # TODO maybe report on result, $?
85
+
86
+ File.open( tidy_file ) do |f|
87
+ puts f.read
88
+ end
89
+
90
+ @word.quit
91
+ end
92
+ end
93
+
94
+ desc "Convert MS Word to HTML and seed_fu fixtures. help=true for detailed usage."
95
+ task :word2seedfu => :environment do
96
+ site_extension_lib = File.join(SiteExtension.root, 'lib')
97
+
98
+ require File.join(site_extension_lib, 'word')
99
+
100
+ sku_id = ENV["INITIAL_SKU_ID"] ? ENV["INITIAL_SKU_ID"] : 0
101
+ sku_prefix = ENV["SKU_PREFIX"] ? ENV["SKU_PREFIX"] : File.basename( RAILS_ROOT )
102
+
103
+ seedfu_template = File.join(site_extension_lib, 'tasks', 'seed_fu_product_template.erb')
104
+
105
+ begin
106
+ File.open( seedfu_template ) do |f|
107
+ @template = ERB.new(f.read)
108
+ end
109
+ rescue => e
110
+ puts "ERROR: #{e.inspect}"
111
+ puts "Cannot open or read template #{seedfu_template}"
112
+ raise e
113
+ end
114
+
115
+ copy_path = ENV["COPY_PATH"] ? ENV["COPY_PATH"] : File.join(RAILS_ROOT, "doc", "copy")
116
+ fixtures_path = ENV["FIXTURES_PATH"] ? ENV["FIXTURES_PATH"] : File.join(RAILS_ROOT, "db", "fixtures")
117
+
118
+ copy_files = Dir[File.join(copy_path, '*.doc')]
119
+
120
+ copy_files.each do |file|
121
+
122
+ name = File.basename(file, '.doc')
123
+
124
+ puts "\n== Generate raw HTML from #{name}.doc =="
125
+
126
+ @word = Word.new(true)
127
+
128
+ @word.open( file )
129
+
130
+ html_file = File.join(copy_path, "#{name}.ms.html")
131
+
132
+ @word.save_as_html( html_file )
133
+
134
+ tidy_file = File.join(copy_path, "#{name}.html")
135
+
136
+ tidy_config = File.join(site_extension_lib, 'tasks', 'tidy_config.txt')
137
+
138
+ puts "tidy cmd line:", "tidy -config #{tidy_config} -clean --show-body-only y --word-2000 y --indent-spaces 2 -output #{tidy_file} #{html_file}"
139
+
140
+ result = system("tidy", '-config', "#{tidy_config}", '-clean', '--show-body-only', 'y', '--word-2000', 'y', '--indent-spaces', '2', '-output', "#{tidy_file}", "#{html_file}")
141
+
142
+ # TODO maybe report on result, $?
143
+
144
+ File.open( tidy_file ) do |f|
145
+ @description = f.read
146
+ end
147
+
148
+ sku_id_str = "%03d" % sku_id
149
+
150
+ seed_file = "#{sku_id_str}_#{name.gsub(' ', '_')}.rb"
151
+ puts "\n== Generate seed fu file #{seed_file} =="
152
+
153
+ @sku = "#{sku_prefix}_#{sku_id_str}"
154
+ @name = 'TODO'
155
+
156
+ File.open( File.join(fixtures_path, seed_file), 'w' ) do |f|
157
+ f.write @template.result(binding)
158
+ puts "\nFile created: #{File.join(fixtures_path, seed_file)}"
159
+ end
160
+
161
+ sku_id += 1
162
+
163
+ @word.quit
164
+ end
165
+
166
+ end
167
167
  end