datashift 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (84) hide show
  1. data/.document +5 -5
  2. data/LICENSE.txt +26 -26
  3. data/README.markdown +326 -305
  4. data/README.rdoc +19 -19
  5. data/Rakefile +86 -93
  6. data/VERSION +1 -1
  7. data/datashift.gemspec +163 -152
  8. data/lib/applications/jruby/jexcel_file.rb +410 -408
  9. data/lib/applications/jruby/word.rb +79 -79
  10. data/lib/datashift.rb +183 -152
  11. data/lib/datashift/exceptions.rb +11 -11
  12. data/lib/datashift/file_definitions.rb +353 -353
  13. data/lib/datashift/mapping_file_definitions.rb +87 -87
  14. data/lib/datashift/method_detail.rb +293 -275
  15. data/lib/datashift/method_dictionary.rb +208 -209
  16. data/lib/datashift/method_mapper.rb +90 -90
  17. data/lib/datashift/model_mapper.rb +27 -0
  18. data/lib/exporters/csv_exporter.rb +36 -0
  19. data/lib/exporters/excel_exporter.rb +116 -0
  20. data/lib/exporters/exporter_base.rb +15 -0
  21. data/lib/generators/csv_generator.rb +36 -36
  22. data/lib/generators/excel_generator.rb +106 -122
  23. data/lib/generators/generator_base.rb +13 -13
  24. data/lib/helpers/core_ext/to_b.rb +24 -24
  25. data/lib/helpers/rake_utils.rb +42 -0
  26. data/lib/helpers/spree_helper.rb +194 -153
  27. data/lib/java/poi-3.7/LICENSE +507 -507
  28. data/lib/java/poi-3.7/NOTICE +21 -21
  29. data/lib/java/poi-3.7/RELEASE_NOTES.txt +115 -115
  30. data/lib/loaders/csv_loader.rb +98 -98
  31. data/lib/loaders/excel_loader.rb +155 -155
  32. data/lib/loaders/loader_base.rb +420 -420
  33. data/lib/loaders/spreadsheet_loader.rb +136 -136
  34. data/lib/loaders/spree/image_loader.rb +67 -63
  35. data/lib/loaders/spree/product_loader.rb +289 -248
  36. data/lib/thor/generate_excel.thor +54 -0
  37. data/sandbox/app/controllers/application_controller.rb +3 -0
  38. data/sandbox/config/application.rb +43 -0
  39. data/sandbox/config/database.yml +34 -0
  40. data/sandbox/config/environment.rb +7 -0
  41. data/sandbox/config/environments/development.rb +30 -0
  42. data/spec/csv_loader_spec.rb +30 -30
  43. data/spec/datashift_spec.rb +26 -26
  44. data/spec/db/migrate/20110803201325_create_test_bed.rb +85 -85
  45. data/spec/excel_exporter_spec.rb +78 -78
  46. data/spec/excel_generator_spec.rb +78 -78
  47. data/spec/excel_loader_spec.rb +223 -223
  48. data/spec/file_definitions.rb +141 -141
  49. data/spec/fixtures/ProjectsDefaults.yml +29 -29
  50. data/spec/fixtures/config/database.yml +27 -27
  51. data/spec/fixtures/datashift_Spree_db.sqlite +0 -0
  52. data/spec/fixtures/datashift_test_models_db.sqlite +0 -0
  53. data/spec/fixtures/negative/SpreeProdMiss1Mandatory.csv +4 -4
  54. data/spec/fixtures/negative/SpreeProdMissManyMandatory.csv +4 -4
  55. data/spec/fixtures/spree/SpreeProducts.csv +4 -4
  56. data/spec/fixtures/spree/SpreeProducts.xls +0 -0
  57. data/spec/fixtures/spree/SpreeProductsMultiColumn.csv +4 -4
  58. data/spec/fixtures/spree/SpreeProductsMultiColumn.xls +0 -0
  59. data/spec/fixtures/spree/SpreeProductsSimple.csv +4 -4
  60. data/spec/fixtures/spree/SpreeProductsWithImages.csv +4 -4
  61. data/spec/fixtures/spree/SpreeZoneExample.csv +5 -5
  62. data/spec/fixtures/test_model_defs.rb +57 -57
  63. data/spec/loader_spec.rb +120 -120
  64. data/spec/method_dictionary_spec.rb +242 -242
  65. data/spec/method_mapper_spec.rb +41 -41
  66. data/spec/spec_helper.rb +154 -116
  67. data/spec/spree_exporter_spec.rb +67 -0
  68. data/spec/spree_generator_spec.rb +77 -64
  69. data/spec/spree_loader_spec.rb +363 -324
  70. data/spec/spree_method_mapping_spec.rb +218 -214
  71. data/tasks/config/seed_fu_product_template.erb +15 -15
  72. data/tasks/config/tidy_config.txt +12 -12
  73. data/tasks/{excel_generator.rake → export/excel_generator.rake} +101 -78
  74. data/tasks/file_tasks.rake +36 -36
  75. data/tasks/import/csv.rake +50 -49
  76. data/tasks/import/excel.rake +74 -71
  77. data/tasks/spree/image_load.rake +108 -108
  78. data/tasks/spree/product_loader.rake +43 -43
  79. data/tasks/word_to_seedfu.rake +166 -166
  80. data/test/helper.rb +18 -18
  81. data/test/test_interact.rb +7 -7
  82. metadata +16 -8
  83. data/datashift-0.1.0.gem +0 -0
  84. data/tasks/db_tasks.rake +0 -66
@@ -1,109 +1,109 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Feb 2011
4
- # License:: MIT
5
- #
6
- # Usage::
7
- #
8
- # => rake datashift:spree:images input=vendor/extensions/site/fixtures/images
9
- # => rake datashift:spree:images input=C:\images\photos large dummy=true
10
- #
11
- # => rake datashift:spree:images input=C:\images\taxon_icons skip_if_no_assoc=true klass=Taxon
12
- #
13
- namespace :datashift do
14
-
15
- namespace :spree do
16
-
17
- desc "Populate the DB with images.\nDefault location db/image_seeds, or specify :input=<path> or dir under db/image_seeds with :folder"
18
- # :dummy => dummy run without actual saving to DB
19
- task :images, [:input, :folder, :dummy, :sku, :skip_if_no_assoc, :skip_if_loaded, :model] => :environment do |t, args|
20
-
21
- require 'image_loader'
22
-
23
- raise "USAGE: Please specify one of :input or :folder" if(args[:input] && args[:folder])
24
- puts "SKU not specified " if(args[:input] && args[:folder])
25
-
26
- if args[:input]
27
- @image_cache = args[:input]
28
- else
29
- @image_cache = File.join(Rails.root, "db", "image_seeds")
30
- @image_cache = File.join(@image_cache, args[:folder]) if(args[:folder])
31
- end
32
-
33
- attachment_klazz = Product
34
-
35
- begin
36
- attachment_klazz = Kernel.const_get(args[:model]) if(args[:model])
37
- rescue NameError
38
- attachment_klazz = Product
39
- end
40
-
41
- image_loader = ImageLoader.new
42
-
43
- if(File.exists? @image_cache )
44
- puts "Loading images from #{@image_cache}"
45
-
46
- missing_records = []
47
- Dir.glob("#{@image_cache}/*.{jpg,png,gif}") do |image_name|
48
-
49
- puts "Processing #{image_name} : #{File.exists?(image_name)}"
50
- base_name = File.basename(image_name, '.*')
51
-
52
- record = nil
53
- if(attachment_klazz == Product && args[:sku])
54
- sku = base_name.slice!(/\w+/)
55
- sku.strip!
56
- base_name.strip!
57
-
58
- puts "Looking fo SKU #{sku}"
59
- record = Variant.find_by_sku(sku)
60
- if record
61
- record = record.product # SKU stored on Variant but we want it's master Product
62
- else
63
- puts "Looking for NAME [#{base_name}]"
64
- record = attachment_klazz.find_by_name(base_name)
65
- end
66
- else
67
- puts "Looking for #{attachment_klazz.name} with NAME [#{base_name}]"
68
- record = attachment_klazz.find_by_name(base_name)
69
- end
70
-
71
- if(record)
72
- puts "Found record for attachment : #{record.inspect}"
73
- exists = record.images.detect {|i| puts "COMPARE #{i.attachment_file_name} => #{image_name}"; i.attachment_file_name == image_name }
74
- puts "Found existing attachments [#{exists}]" unless(exists.nil?)
75
- if(args[:skip_if_loaded] && !exists.nil?)
76
- puts "Skipping - Image #{image_name} already loaded for #{attachment_klazz}"
77
- next
78
- end
79
- else
80
- missing_records << image_name
81
- end
82
-
83
- # Now do actual upload to DB unless we are doing a dummy run,
84
- # or the Image must have an associated record
85
- unless(args[:dummy] == 'true' || (args[:skip_if_no_assoc] && record.nil?))
86
- image_loader.reset()
87
- puts "Process Image"
88
- image_loader.process( image_name, record )
89
- end
90
-
91
- end
92
-
93
- unless missing_records.empty?
94
- FileUtils.mkdir_p('MissingRecords') unless File.directory?('MissingRecords')
95
-
96
- puts '\nMISSING Records Report>>'
97
- missing_records.each do |i|
98
- puts "Copy #{i} to MissingRecords folder"
99
- FileUtils.cp( i, 'MissingRecords') unless(args[:dummy] == 'true')
100
- end
101
- end
102
-
103
- else
104
- puts "ERROR: Supplied Path #{@image_cache} not accesible"
105
- exit(-1)
106
- end
107
- end
108
- end
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Feb 2011
4
+ # License:: MIT
5
+ #
6
+ # Usage::
7
+ #
8
+ # => rake datashift:spree:images input=vendor/extensions/site/fixtures/images
9
+ # => rake datashift:spree:images input=C:\images\photos large dummy=true
10
+ #
11
+ # => rake datashift:spree:images input=C:\images\taxon_icons skip_if_no_assoc=true klass=Taxon
12
+ #
13
+ namespace :datashift do
14
+
15
+ namespace :spree do
16
+
17
+ desc "Populate the DB with images.\nDefault location db/image_seeds, or specify :input=<path> or dir under db/image_seeds with :folder"
18
+ # :dummy => dummy run without actual saving to DB
19
+ task :images, [:input, :folder, :dummy, :sku, :skip_if_no_assoc, :skip_if_loaded, :model] => :environment do |t, args|
20
+
21
+ require 'image_loader'
22
+
23
+ raise "USAGE: Please specify one of :input or :folder" if(args[:input] && args[:folder])
24
+ puts "SKU not specified " if(args[:input] && args[:folder])
25
+
26
+ if args[:input]
27
+ @image_cache = args[:input]
28
+ else
29
+ @image_cache = File.join(Rails.root, "db", "image_seeds")
30
+ @image_cache = File.join(@image_cache, args[:folder]) if(args[:folder])
31
+ end
32
+
33
+ attachment_klazz = Product
34
+
35
+ begin
36
+ attachment_klazz = Kernel.const_get(args[:model]) if(args[:model])
37
+ rescue NameError
38
+ attachment_klazz = Product
39
+ end
40
+
41
+ image_loader = ImageLoader.new
42
+
43
+ if(File.exists? @image_cache )
44
+ puts "Loading images from #{@image_cache}"
45
+
46
+ missing_records = []
47
+ Dir.glob("#{@image_cache}/*.{jpg,png,gif}") do |image_name|
48
+
49
+ puts "Processing #{image_name} : #{File.exists?(image_name)}"
50
+ base_name = File.basename(image_name, '.*')
51
+
52
+ record = nil
53
+ if(attachment_klazz == Product && args[:sku])
54
+ sku = base_name.slice!(/\w+/)
55
+ sku.strip!
56
+ base_name.strip!
57
+
58
+ puts "Looking fo SKU #{sku}"
59
+ record = Variant.find_by_sku(sku)
60
+ if record
61
+ record = record.product # SKU stored on Variant but we want it's master Product
62
+ else
63
+ puts "Looking for NAME [#{base_name}]"
64
+ record = attachment_klazz.find_by_name(base_name)
65
+ end
66
+ else
67
+ puts "Looking for #{attachment_klazz.name} with NAME [#{base_name}]"
68
+ record = attachment_klazz.find_by_name(base_name)
69
+ end
70
+
71
+ if(record)
72
+ puts "Found record for attachment : #{record.inspect}"
73
+ exists = record.images.detect {|i| puts "COMPARE #{i.attachment_file_name} => #{image_name}"; i.attachment_file_name == image_name }
74
+ puts "Found existing attachments [#{exists}]" unless(exists.nil?)
75
+ if(args[:skip_if_loaded] && !exists.nil?)
76
+ puts "Skipping - Image #{image_name} already loaded for #{attachment_klazz}"
77
+ next
78
+ end
79
+ else
80
+ missing_records << image_name
81
+ end
82
+
83
+ # Now do actual upload to DB unless we are doing a dummy run,
84
+ # or the Image must have an associated record
85
+ unless(args[:dummy] == 'true' || (args[:skip_if_no_assoc] && record.nil?))
86
+ image_loader.reset()
87
+ puts "Process Image"
88
+ image_loader.process( image_name, record )
89
+ end
90
+
91
+ end
92
+
93
+ unless missing_records.empty?
94
+ FileUtils.mkdir_p('MissingRecords') unless File.directory?('MissingRecords')
95
+
96
+ puts '\nMISSING Records Report>>'
97
+ missing_records.each do |i|
98
+ puts "Copy #{i} to MissingRecords folder"
99
+ FileUtils.cp( i, 'MissingRecords') unless(args[:dummy] == 'true')
100
+ end
101
+ end
102
+
103
+ else
104
+ puts "ERROR: Supplied Path #{@image_cache} not accesible"
105
+ exit(-1)
106
+ end
107
+ end
108
+ end
109
109
  end
@@ -1,44 +1,44 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Feb 2011
4
- # License:: MIT. Free, Open Source.
5
- #
6
- # REQUIRES: JRuby access to Java
7
- #
8
- # Usage::
9
- #
10
- # e.g. => jruby -S rake datashift:spree:products input=vendor/extensions/autotelik/fixtures/SiteSpreadsheetInfo.xls
11
- # => jruby -S rake datashift:spree:products input=C:\MyProducts.xls verbose=true
12
- #
13
- require 'datashift'
14
-
15
- namespace :datashift do
16
-
17
- namespace :spree do
18
-
19
- desc "Populate Spree Product/Variant data from .xls (Excel) or CSV file"
20
- task :products, [:input, :verbose, :sku_prefix] => :environment do |t, args|
21
-
22
- input = ENV['input']
23
-
24
- raise "USAGE: jruby -S rake datashift:spree:products input=excel_file.xls" unless input
25
- raise "ERROR: Could not find file #{args[:input]}" unless File.exists?(input)
26
-
27
- require 'product_loader'
28
-
29
- # COLUMNS WITH DEFAULTS - TODO create YAML configuration file to drive defaults etc
30
-
31
- loader = DataShift::ProductLoader.new
32
-
33
- loader.set_default_value('available_on', Time.now.to_s(:db) )
34
- loader.set_default_value('cost_price', 0.0 )
35
-
36
- loader.set_prefix('sku', args[:sku_prefix] ) if(args[:sku_prefix])
37
-
38
- puts "Loading from file: #{input}"
39
-
40
- loader.perform_load(input, :mandatory => ['sku', 'name', 'price'] )
41
- end
42
- end
43
-
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Feb 2011
4
+ # License:: MIT. Free, Open Source.
5
+ #
6
+ # REQUIRES: JRuby access to Java
7
+ #
8
+ # Usage::
9
+ #
10
+ # e.g. => jruby -S rake datashift:spree:products input=vendor/extensions/autotelik/fixtures/SiteSpreadsheetInfo.xls
11
+ # => jruby -S rake datashift:spree:products input=C:\MyProducts.xls verbose=true
12
+ #
13
+ require 'datashift'
14
+
15
+ namespace :datashift do
16
+
17
+ namespace :spree do
18
+
19
+ desc "Populate Spree Product/Variant data from .xls (Excel) or CSV file"
20
+ task :products, [:input, :verbose, :sku_prefix] => :environment do |t, args|
21
+
22
+ input = ENV['input']
23
+
24
+ raise "USAGE: jruby -S rake datashift:spree:products input=excel_file.xls" unless input
25
+ raise "ERROR: Could not find file #{args[:input]}" unless File.exists?(input)
26
+
27
+ require 'product_loader'
28
+
29
+ # COLUMNS WITH DEFAULTS - TODO create YAML configuration file to drive defaults etc
30
+
31
+ loader = DataShift::ProductLoader.new
32
+
33
+ loader.set_default_value('available_on', Time.now.to_s(:db) )
34
+ loader.set_default_value('cost_price', 0.0 )
35
+
36
+ loader.set_prefix('sku', args[:sku_prefix] ) if(args[:sku_prefix])
37
+
38
+ puts "Loading from file: #{input}"
39
+
40
+ loader.perform_load(input, :mandatory => ['sku', 'name', 'price'] )
41
+ end
42
+ end
43
+
44
44
  end
@@ -1,167 +1,167 @@
1
- # Copyright:: (c) Autotelik Media Ltd 2011
2
- # Author :: Tom Statter
3
- # Date :: Aug 2010
4
- #
5
- # License:: Free, OpenSource... MIT ?
6
- #
7
- # About:: Rake tasks to read Word documents, containing product descriptions,
8
- # convert to HTML, tidy the HTML and then create seed_fu ready fixtures,
9
- # from a template, with product description supplied by the HTML
10
- #
11
- # Note cleanest HTML is produced by this combination : saving with WdFormatHTML
12
- # not WdFormatFilteredHTML and using the '--word-2000', 'y' option to tidy
13
- # (don't use the '--bare' option)
14
- #
15
- # Not currently available for JRuby due to Win32Ole requirement
16
- #
17
- # Requires local exes available in PATH for :
18
- # Microsoft Word
19
- # HTML Tidy - http://tidy.sourceforge.net (Free)
20
- #
21
- require 'erb'
22
-
23
- namespace :datashift do
24
-
25
- desc "Convert MS Word to HTML and seed_fu fixtures. help=true for detailed usage."
26
-
27
- task :word2html, [:help] => [:environment] do |t, args|
28
- x =<<-EOS
29
-
30
- USAGE::
31
- Convert MS Word docs to HTML and seed_fu fixtures, by default searches for docs
32
- in RAILS_ROOT/doc/copy
33
-
34
- You can change the directory where Word document files are located
35
- with the COPY_PATH environment variable.
36
-
37
- Examples:
38
- # default, to convert all Word files for the current environment
39
- rake datashift:word2seedfu
40
-
41
- # to load seed files matching orders or customers
42
- rake db:seed SEED=orders,customers
43
-
44
- # to load files from RAILS_ROOT/features/fixtures
45
- rake db:seed FIXTURE_PATH=features/fixtures
46
- EOS
47
-
48
- if(args[:help])
49
- puts x
50
- exit(0)
51
- end
52
-
53
- site_extension_lib = File.join(SiteExtension.root, 'lib')
54
-
55
- require File.join(site_extension_lib, 'word')
56
-
57
- copy_path = ENV["COPY_PATH"] ? ENV["COPY_PATH"] : File.join(RAILS_ROOT, "doc", "copy")
58
- fixtures_path = ENV["FIXTURES_PATH"] ? ENV["FIXTURES_PATH"] : File.join(RAILS_ROOT, "db", "fixtures")
59
-
60
- copy_files = Dir[File.join(copy_path, '*.doc')]
61
-
62
- copy_files.each do |file|
63
-
64
- name = File.basename(file, '.doc')
65
-
66
- puts "\n== Generate raw HTML from #{name}.doc =="
67
-
68
- @word = Word.new(true)
69
-
70
- @word.open( file )
71
-
72
- html_file = File.join(copy_path, "#{name}.ms.html")
73
-
74
- @word.save_as_html( html_file )
75
-
76
- tidy_file = File.join(copy_path, "#{name}.html")
77
-
78
- tidy_config = File.join(site_extension_lib, 'tasks', 'tidy_config.txt')
79
-
80
- puts "tidy cmd line:", "tidy -config #{tidy_config} -clean --show-body-only y --word-2000 y --indent-spaces 2 -output #{tidy_file} #{html_file}"
81
-
82
- result = system("tidy", '-config', "#{tidy_config}", '-clean', '--show-body-only', 'y', '--word-2000', 'y', '--indent-spaces', '2', '-output', "#{tidy_file}", "#{html_file}")
83
-
84
- # TODO maybe report on result, $?
85
-
86
- File.open( tidy_file ) do |f|
87
- puts f.read
88
- end
89
-
90
- @word.quit
91
- end
92
- end
93
-
94
- desc "Convert MS Word to HTML and seed_fu fixtures. help=true for detailed usage."
95
- task :word2seedfu => :environment do
96
- site_extension_lib = File.join(SiteExtension.root, 'lib')
97
-
98
- require File.join(site_extension_lib, 'word')
99
-
100
- sku_id = ENV["INITIAL_SKU_ID"] ? ENV["INITIAL_SKU_ID"] : 0
101
- sku_prefix = ENV["SKU_PREFIX"] ? ENV["SKU_PREFIX"] : File.basename( RAILS_ROOT )
102
-
103
- seedfu_template = File.join(site_extension_lib, 'tasks', 'seed_fu_product_template.erb')
104
-
105
- begin
106
- File.open( seedfu_template ) do |f|
107
- @template = ERB.new(f.read)
108
- end
109
- rescue => e
110
- puts "ERROR: #{e.inspect}"
111
- puts "Cannot open or read template #{seedfu_template}"
112
- raise e
113
- end
114
-
115
- copy_path = ENV["COPY_PATH"] ? ENV["COPY_PATH"] : File.join(RAILS_ROOT, "doc", "copy")
116
- fixtures_path = ENV["FIXTURES_PATH"] ? ENV["FIXTURES_PATH"] : File.join(RAILS_ROOT, "db", "fixtures")
117
-
118
- copy_files = Dir[File.join(copy_path, '*.doc')]
119
-
120
- copy_files.each do |file|
121
-
122
- name = File.basename(file, '.doc')
123
-
124
- puts "\n== Generate raw HTML from #{name}.doc =="
125
-
126
- @word = Word.new(true)
127
-
128
- @word.open( file )
129
-
130
- html_file = File.join(copy_path, "#{name}.ms.html")
131
-
132
- @word.save_as_html( html_file )
133
-
134
- tidy_file = File.join(copy_path, "#{name}.html")
135
-
136
- tidy_config = File.join(site_extension_lib, 'tasks', 'tidy_config.txt')
137
-
138
- puts "tidy cmd line:", "tidy -config #{tidy_config} -clean --show-body-only y --word-2000 y --indent-spaces 2 -output #{tidy_file} #{html_file}"
139
-
140
- result = system("tidy", '-config', "#{tidy_config}", '-clean', '--show-body-only', 'y', '--word-2000', 'y', '--indent-spaces', '2', '-output', "#{tidy_file}", "#{html_file}")
141
-
142
- # TODO maybe report on result, $?
143
-
144
- File.open( tidy_file ) do |f|
145
- @description = f.read
146
- end
147
-
148
- sku_id_str = "%03d" % sku_id
149
-
150
- seed_file = "#{sku_id_str}_#{name.gsub(' ', '_')}.rb"
151
- puts "\n== Generate seed fu file #{seed_file} =="
152
-
153
- @sku = "#{sku_prefix}_#{sku_id_str}"
154
- @name = 'TODO'
155
-
156
- File.open( File.join(fixtures_path, seed_file), 'w' ) do |f|
157
- f.write @template.result(binding)
158
- puts "\nFile created: #{File.join(fixtures_path, seed_file)}"
159
- end
160
-
161
- sku_id += 1
162
-
163
- @word.quit
164
- end
165
-
166
- end
1
+ # Copyright:: (c) Autotelik Media Ltd 2011
2
+ # Author :: Tom Statter
3
+ # Date :: Aug 2010
4
+ #
5
+ # License:: Free, OpenSource... MIT ?
6
+ #
7
+ # About:: Rake tasks to read Word documents, containing product descriptions,
8
+ # convert to HTML, tidy the HTML and then create seed_fu ready fixtures,
9
+ # from a template, with product description supplied by the HTML
10
+ #
11
+ # Note cleanest HTML is produced by this combination : saving with WdFormatHTML
12
+ # not WdFormatFilteredHTML and using the '--word-2000', 'y' option to tidy
13
+ # (don't use the '--bare' option)
14
+ #
15
+ # Not currently available for JRuby due to Win32Ole requirement
16
+ #
17
+ # Requires local exes available in PATH for :
18
+ # Microsoft Word
19
+ # HTML Tidy - http://tidy.sourceforge.net (Free)
20
+ #
21
+ require 'erb'
22
+
23
+ namespace :datashift do
24
+
25
+ desc "Convert MS Word to HTML and seed_fu fixtures. help=true for detailed usage."
26
+
27
+ task :word2html, [:help] => [:environment] do |t, args|
28
+ x =<<-EOS
29
+
30
+ USAGE::
31
+ Convert MS Word docs to HTML and seed_fu fixtures, by default searches for docs
32
+ in RAILS_ROOT/doc/copy
33
+
34
+ You can change the directory where Word document files are located
35
+ with the COPY_PATH environment variable.
36
+
37
+ Examples:
38
+ # default, to convert all Word files for the current environment
39
+ rake datashift:word2seedfu
40
+
41
+ # to load seed files matching orders or customers
42
+ rake db:seed SEED=orders,customers
43
+
44
+ # to load files from RAILS_ROOT/features/fixtures
45
+ rake db:seed FIXTURE_PATH=features/fixtures
46
+ EOS
47
+
48
+ if(args[:help])
49
+ puts x
50
+ exit(0)
51
+ end
52
+
53
+ site_extension_lib = File.join(SiteExtension.root, 'lib')
54
+
55
+ require File.join(site_extension_lib, 'word')
56
+
57
+ copy_path = ENV["COPY_PATH"] ? ENV["COPY_PATH"] : File.join(RAILS_ROOT, "doc", "copy")
58
+ fixtures_path = ENV["FIXTURES_PATH"] ? ENV["FIXTURES_PATH"] : File.join(RAILS_ROOT, "db", "fixtures")
59
+
60
+ copy_files = Dir[File.join(copy_path, '*.doc')]
61
+
62
+ copy_files.each do |file|
63
+
64
+ name = File.basename(file, '.doc')
65
+
66
+ puts "\n== Generate raw HTML from #{name}.doc =="
67
+
68
+ @word = Word.new(true)
69
+
70
+ @word.open( file )
71
+
72
+ html_file = File.join(copy_path, "#{name}.ms.html")
73
+
74
+ @word.save_as_html( html_file )
75
+
76
+ tidy_file = File.join(copy_path, "#{name}.html")
77
+
78
+ tidy_config = File.join(site_extension_lib, 'tasks', 'tidy_config.txt')
79
+
80
+ puts "tidy cmd line:", "tidy -config #{tidy_config} -clean --show-body-only y --word-2000 y --indent-spaces 2 -output #{tidy_file} #{html_file}"
81
+
82
+ result = system("tidy", '-config', "#{tidy_config}", '-clean', '--show-body-only', 'y', '--word-2000', 'y', '--indent-spaces', '2', '-output', "#{tidy_file}", "#{html_file}")
83
+
84
+ # TODO maybe report on result, $?
85
+
86
+ File.open( tidy_file ) do |f|
87
+ puts f.read
88
+ end
89
+
90
+ @word.quit
91
+ end
92
+ end
93
+
94
+ desc "Convert MS Word to HTML and seed_fu fixtures. help=true for detailed usage."
95
+ task :word2seedfu => :environment do
96
+ site_extension_lib = File.join(SiteExtension.root, 'lib')
97
+
98
+ require File.join(site_extension_lib, 'word')
99
+
100
+ sku_id = ENV["INITIAL_SKU_ID"] ? ENV["INITIAL_SKU_ID"] : 0
101
+ sku_prefix = ENV["SKU_PREFIX"] ? ENV["SKU_PREFIX"] : File.basename( RAILS_ROOT )
102
+
103
+ seedfu_template = File.join(site_extension_lib, 'tasks', 'seed_fu_product_template.erb')
104
+
105
+ begin
106
+ File.open( seedfu_template ) do |f|
107
+ @template = ERB.new(f.read)
108
+ end
109
+ rescue => e
110
+ puts "ERROR: #{e.inspect}"
111
+ puts "Cannot open or read template #{seedfu_template}"
112
+ raise e
113
+ end
114
+
115
+ copy_path = ENV["COPY_PATH"] ? ENV["COPY_PATH"] : File.join(RAILS_ROOT, "doc", "copy")
116
+ fixtures_path = ENV["FIXTURES_PATH"] ? ENV["FIXTURES_PATH"] : File.join(RAILS_ROOT, "db", "fixtures")
117
+
118
+ copy_files = Dir[File.join(copy_path, '*.doc')]
119
+
120
+ copy_files.each do |file|
121
+
122
+ name = File.basename(file, '.doc')
123
+
124
+ puts "\n== Generate raw HTML from #{name}.doc =="
125
+
126
+ @word = Word.new(true)
127
+
128
+ @word.open( file )
129
+
130
+ html_file = File.join(copy_path, "#{name}.ms.html")
131
+
132
+ @word.save_as_html( html_file )
133
+
134
+ tidy_file = File.join(copy_path, "#{name}.html")
135
+
136
+ tidy_config = File.join(site_extension_lib, 'tasks', 'tidy_config.txt')
137
+
138
+ puts "tidy cmd line:", "tidy -config #{tidy_config} -clean --show-body-only y --word-2000 y --indent-spaces 2 -output #{tidy_file} #{html_file}"
139
+
140
+ result = system("tidy", '-config', "#{tidy_config}", '-clean', '--show-body-only', 'y', '--word-2000', 'y', '--indent-spaces', '2', '-output', "#{tidy_file}", "#{html_file}")
141
+
142
+ # TODO maybe report on result, $?
143
+
144
+ File.open( tidy_file ) do |f|
145
+ @description = f.read
146
+ end
147
+
148
+ sku_id_str = "%03d" % sku_id
149
+
150
+ seed_file = "#{sku_id_str}_#{name.gsub(' ', '_')}.rb"
151
+ puts "\n== Generate seed fu file #{seed_file} =="
152
+
153
+ @sku = "#{sku_prefix}_#{sku_id_str}"
154
+ @name = 'TODO'
155
+
156
+ File.open( File.join(fixtures_path, seed_file), 'w' ) do |f|
157
+ f.write @template.result(binding)
158
+ puts "\nFile created: #{File.join(fixtures_path, seed_file)}"
159
+ end
160
+
161
+ sku_id += 1
162
+
163
+ @word.quit
164
+ end
165
+
166
+ end
167
167
  end