csv-import-analyzer 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +8 -1
- data/csv-import-analyzer.gemspec +1 -1
- data/lib/csv-import-analyzer.rb +6 -4
- data/lib/csv-import-analyzer/analyzer/csv_check_bounds.rb +30 -19
- data/lib/csv-import-analyzer/analyzer/delimiter_identifier.rb +44 -24
- data/lib/csv-import-analyzer/analyzer/file_type_assertion.rb +1 -5
- data/lib/csv-import-analyzer/csv_datatype_analysis.rb +25 -9
- data/lib/csv-import-analyzer/csv_sanitizer.rb +67 -17
- data/lib/csv-import-analyzer/export/metadata_analysis.rb +63 -7
- data/lib/csv-import-analyzer/helpers/common_functions.rb +4 -0
- data/lib/csv-import-analyzer/helpers/datatype_validation.rb +6 -6
- data/lib/csv-import-analyzer/helpers/string_class_extensions.rb +9 -3
- data/lib/csv-import-analyzer/query_builder/mysql_query_helper.rb +2 -2
- data/lib/csv-import-analyzer/query_builder/pg_query_helper.rb +1 -2
- data/lib/csv-import-analyzer/query_builder/query_helper.rb +2 -2
- data/lib/csv-import-analyzer/sql_query_builder.rb +27 -12
- data/lib/csv-import-analyzer/version.rb +1 -1
- data/spec/csv-import-analyzer/analyzer/csv_check_bounds_spec.rb +8 -8
- data/spec/csv-import-analyzer/analyzer/delimiter_identifier_spec.rb +13 -13
- data/spec/csv-import-analyzer/csv_sanitizer_spec.rb +10 -7
- data/spec/csv-import-analyzer/helpers/common_functions_spec.rb +20 -19
- data/spec/csv-import-analyzer/helpers/datatype_validation_spec.rb +28 -28
- data/spec/csv-import-analyzer/helpers/string_class_extension_spec.rb +6 -6
- data/spec/csv-import-analyzer/query_builder/mysql_query_helper_spec.rb +13 -13
- data/spec/csv-import-analyzer/query_builder/pg_query_helper_spec.rb +16 -16
- data/spec/csv-import-analyzer_spec.rb +3 -6
- data/spec/fixtures/sample.csv +2 -2
- data/spec/spec_helper.rb +3 -0
- metadata +17 -6
- data/lib/csv-import-analyzer/sampleTab.csv +0 -5
- data/samples/metadata_output.json +0 -70
- data/spec/csv-import-analyzer/csv_datatype_analysis_spec.rb +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7b62ce806e6c9ce5dbd5bb625cec61e9f62e861d
|
4
|
+
data.tar.gz: 097ec0f8a105b92a0adf7869f4e01d5330d62abc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a0e7aff88f46560a7c263bee87f6840f2b918acd5003e0a3cd710a37910392d1452468d5fce3e8d0ee3aece44ab79f17c08da1aabab226ae70ef9bd58e9cf45c
|
7
|
+
data.tar.gz: 9778e7a00b0c972aaa45a6260a4a03b7ea699612c68740ab8acd10bbfc1369e4fc6f9e5c167a2b4936877c87c99a22df9965865d95cc760763541c6524f21a2f
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -34,10 +34,17 @@ Calling process on a filename would generate a metadata_output.json which has th
|
|
34
34
|
|
35
35
|
## TODO:
|
36
36
|
<ul>
|
37
|
+
<li> Handle control of processed input file to user </li>
|
37
38
|
<li> Return the analysis as Json object.</li>
|
38
|
-
<li> Structuring the analysis outputted to csv
|
39
|
+
<li> Better - Structuring the analysis outputted to csv</li>
|
40
|
+
<li> Add support to convert and import xlsx files to csv </li>
|
39
41
|
</ul>
|
40
42
|
|
43
|
+
## Additional Information
|
44
|
+
|
45
|
+
### Dependencies
|
46
|
+
<ul><li><a href="https://github.com/tilo/smarter_csv">smarter_csv</a> - For processing the csv in chunks</li></ul>
|
47
|
+
|
41
48
|
## Contributing
|
42
49
|
|
43
50
|
1. Fork it ( https://github.com/avinash-vllbh/csv-import-analyzer/fork )
|
data/csv-import-analyzer.gemspec
CHANGED
@@ -24,5 +24,5 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.add_development_dependency "rspec", "~> 3.0"
|
25
25
|
spec.add_development_dependency "simplecov", "~> 0.9"
|
26
26
|
|
27
|
-
spec.add_runtime_dependency "smarter_csv", "~> 1.0.17"
|
27
|
+
spec.add_runtime_dependency "smarter_csv", "~> 1.0", ">= 1.0.17"
|
28
28
|
end
|
data/lib/csv-import-analyzer.rb
CHANGED
@@ -1,10 +1,15 @@
|
|
1
|
-
require 'pry'
|
2
1
|
require_relative "csv-import-analyzer/csv_sanitizer"
|
3
2
|
require_relative "csv-import-analyzer/helpers/errors"
|
4
3
|
module CsvImportAnalyzer
|
5
4
|
# To identify the methods in the module as class methods
|
6
5
|
extend self
|
7
6
|
|
7
|
+
###
|
8
|
+
# main public interface to the library
|
9
|
+
# makes sure that the file exists and
|
10
|
+
# passes the file and any additional options given to CsvSanitizer
|
11
|
+
# returns FileNotFound if given file is invalid
|
12
|
+
###
|
8
13
|
def process(filename, options = {})
|
9
14
|
if File::exist?(filename)
|
10
15
|
CsvImportAnalyzer::CsvSanitizer.new().process(File.absolute_path(filename), options)
|
@@ -13,6 +18,3 @@ module CsvImportAnalyzer
|
|
13
18
|
end
|
14
19
|
end
|
15
20
|
end
|
16
|
-
|
17
|
-
CsvImportAnalyzer.process("sampleTab.csv", {:metadata_output => true, :unique => 2})
|
18
|
-
# CsvImportAnalyzer.process("sampleTab.csv", {:metadata_output => true, :out_format => :csv})
|
@@ -1,5 +1,4 @@
|
|
1
1
|
require 'smarter_csv'
|
2
|
-
require 'pry'
|
3
2
|
require_relative "../helpers/common_functions"
|
4
3
|
require_relative "../helpers/errors"
|
5
4
|
|
@@ -15,7 +14,6 @@ module CsvImportAnalyzer
|
|
15
14
|
@min_max_bounds = {}
|
16
15
|
@distinct_values = {}
|
17
16
|
@nullable = options[:nullable] || []
|
18
|
-
|
19
17
|
end
|
20
18
|
|
21
19
|
def filename
|
@@ -31,10 +29,14 @@ module CsvImportAnalyzer
|
|
31
29
|
@max_distinct_values ||= Integer(options[:unique]) + 1
|
32
30
|
end
|
33
31
|
|
34
|
-
# Public interface
|
32
|
+
# Public interface for CsvCheckBounds
|
33
|
+
# Processes the CSV file for min & max values and distinct values for each column
|
35
34
|
def get_min_max_values
|
36
35
|
unless filename.nil?
|
37
36
|
if File.exist?(filename)
|
37
|
+
# Using SmarterCSV gem to retrieve the csv records in chunks
|
38
|
+
# Chunk size can be set by the user
|
39
|
+
# E.g. :chunk_size => 200 would retrieve 200 rows each time
|
38
40
|
SmarterCSV.process(filename, {:col_sep => delimiter, :chunk_size => chunk_size,
|
39
41
|
:remove_empty_values => false, :remove_zero_values => false}) do |chunk|
|
40
42
|
chunk.each do |row|
|
@@ -59,9 +61,10 @@ module CsvImportAnalyzer
|
|
59
61
|
|
60
62
|
private
|
61
63
|
|
62
|
-
|
63
|
-
#If the key is of String type then we find the max length of it
|
64
|
-
|
64
|
+
###
|
65
|
+
# If the key is of String type then we find the max length of it
|
66
|
+
# Any other datatype would have a min and max ranges
|
67
|
+
###
|
65
68
|
def process_min_max_for_column(key, value)
|
66
69
|
if min_max_bounds[key].nil?
|
67
70
|
unless csv_column_datatypes[key] == :string
|
@@ -73,22 +76,30 @@ module CsvImportAnalyzer
|
|
73
76
|
add_bounds(key, value)
|
74
77
|
end
|
75
78
|
|
76
|
-
|
77
|
-
#Method
|
78
|
-
|
79
|
+
###
|
80
|
+
# Method to decide on the min max values for each key
|
81
|
+
# Checks for length if key is of String format
|
82
|
+
# Check for values if key is of Numeric or Datetime format
|
83
|
+
###
|
79
84
|
def add_bounds(key, value)
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
85
|
+
begin
|
86
|
+
if csv_column_datatypes[key] == :string
|
87
|
+
min_max_bounds[key][:min] = value.length if value.length < min_max_bounds[key][:min]
|
88
|
+
min_max_bounds[key][:max] = value.length if value.length > min_max_bounds[key][:max]
|
89
|
+
else
|
90
|
+
min_max_bounds[key][:min] = value if value < min_max_bounds[key][:min]
|
91
|
+
min_max_bounds[key][:max] = value if value > min_max_bounds[key][:max]
|
92
|
+
end
|
93
|
+
rescue ArgumentError, NoMethodError => e
|
94
|
+
###
|
95
|
+
# TODO: Handle csv parse coversions of datatypes
|
96
|
+
###
|
86
97
|
end
|
87
98
|
end
|
88
99
|
|
89
|
-
|
90
|
-
#Processes the max number of distinct values set for each column
|
91
|
-
|
100
|
+
###
|
101
|
+
# Processes the max number of distinct values set for each column
|
102
|
+
###
|
92
103
|
def process_distinct_values(key, value)
|
93
104
|
if distinct_values[key].nil?
|
94
105
|
distinct_values[key] = [value]
|
@@ -101,4 +112,4 @@ module CsvImportAnalyzer
|
|
101
112
|
end
|
102
113
|
|
103
114
|
end
|
104
|
-
end
|
115
|
+
end
|
@@ -1,39 +1,31 @@
|
|
1
1
|
require_relative "../helpers/string_class_extensions"
|
2
2
|
require 'pry'
|
3
|
-
|
4
3
|
module CsvImportAnalyzer
|
5
4
|
module DelimiterIdentifier
|
6
5
|
|
7
|
-
|
8
|
-
|
6
|
+
###
|
7
|
+
# Types of delimiters that the gem has to lookout for.
|
8
|
+
# Could be changed in future or to custom delimiters
|
9
|
+
# returns a @delimiter instance variable array
|
10
|
+
###
|
9
11
|
def delimiter
|
10
12
|
@delimiter ||= [",", ";", "\t", "|"]
|
11
13
|
end
|
12
14
|
|
15
|
+
###
|
16
|
+
# Routine to intialize the delimiter_count hash with the delimiters defined above with a base count of 0
|
17
|
+
# Returns @delimiter_count instance variable
|
18
|
+
###
|
13
19
|
def delimiter_count
|
14
20
|
@delimiter_count ||= Hash[delimiter.map {|v| [v,0]}]
|
15
21
|
@delimiter_count
|
16
22
|
end
|
17
23
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
def count_occurances_delimiter(line)
|
24
|
-
delimiter_count.keys.each do |key|
|
25
|
-
#Count the occurances of delimiter in a line
|
26
|
-
total_count_delimiter = line.substr_count(key)
|
27
|
-
#count the occurances of delimiter between quotes inside a line to disregard them
|
28
|
-
quoted_delimiter_count = getting_contents_of_quoted_values(line).substr_count(key)
|
29
|
-
delimiter_count[key] += total_count_delimiter - quoted_delimiter_count
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
def return_plausible_delimiter
|
34
|
-
return delimiter_count.key(delimiter_count.values.max)
|
35
|
-
end
|
36
|
-
|
24
|
+
###
|
25
|
+
# Method to analyze input data and determine delimiter
|
26
|
+
# Input can be either a csv file or even a array of strings
|
27
|
+
# returns delimiter
|
28
|
+
###
|
37
29
|
def identify_delimiter(filename_or_sample)
|
38
30
|
#filename_or_sample input can be either a File or an Array or a string - Return delimiter for File or an Array of strings (if found)
|
39
31
|
if filename_or_sample.class == String
|
@@ -60,7 +52,35 @@ module CsvImportAnalyzer
|
|
60
52
|
InvalidInput.new
|
61
53
|
end
|
62
54
|
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def getting_contents_of_quoted_values(input)
|
59
|
+
#return a join of all the strings inside quotes inside a line
|
60
|
+
input.scan(/".*?"/).join
|
61
|
+
end
|
62
|
+
|
63
|
+
###
|
64
|
+
# Find the count of delimiter occurances in a line
|
65
|
+
# CSV files can have delimiters escaped between quotes
|
66
|
+
# valid count = total_count - delimiters inside quotes
|
67
|
+
###
|
68
|
+
def count_occurances_delimiter(line)
|
69
|
+
delimiter_count.keys.each do |key|
|
70
|
+
#Count the occurances of delimiter in a line
|
71
|
+
total_count_delimiter = line.substr_count(key)
|
72
|
+
#count the occurances of delimiter between quotes inside a line to disregard them
|
73
|
+
quoted_delimiter_count = getting_contents_of_quoted_values(line).substr_count(key)
|
74
|
+
delimiter_count[key] += total_count_delimiter - quoted_delimiter_count
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
###
|
79
|
+
# Plausible delimiter would be the one i.e. of most occurance of the set of rows
|
80
|
+
###
|
81
|
+
def return_plausible_delimiter
|
82
|
+
return delimiter_count.key(delimiter_count.values.max)
|
83
|
+
end
|
84
|
+
|
63
85
|
end
|
64
86
|
end
|
65
|
-
|
66
|
-
# puts CsvImportAnalyzer::DelimiterIdentifier.identify_delimiter("/home/avinash/Desktop/csv-import-analyzer/spec/fixtures/sample.csv")
|
@@ -1,10 +1,10 @@
|
|
1
1
|
require "smarter_csv"
|
2
2
|
require "tempfile"
|
3
|
-
require "pry"
|
4
3
|
require_relative "helpers/datatype_validation"
|
5
4
|
require_relative "analyzer/csv_check_bounds"
|
6
5
|
require_relative "helpers/common_functions"
|
7
6
|
require_relative "sql_query_builder"
|
7
|
+
require "pry"
|
8
8
|
|
9
9
|
module CsvImportAnalyzer
|
10
10
|
class CsvDatatypeAnalysis
|
@@ -27,8 +27,12 @@ module CsvImportAnalyzer
|
|
27
27
|
@options[:filename]
|
28
28
|
end
|
29
29
|
|
30
|
-
|
30
|
+
###
|
31
31
|
# Process a chunk of csv file for all possible datatypes towards each column in the row
|
32
|
+
# This datatype analysis is used for analyzing,
|
33
|
+
# Min - Max values of each column
|
34
|
+
# Distinct values of each column
|
35
|
+
# Enumeration eligibility
|
32
36
|
def datatype_analysis
|
33
37
|
SmarterCSV.process(filename, {:col_sep => delimiter, :chunk_size => chunk_size,
|
34
38
|
:remove_empty_values => false, :remove_zero_values => false}) do |chunk|
|
@@ -61,14 +65,18 @@ module CsvImportAnalyzer
|
|
61
65
|
return options[:chunk]
|
62
66
|
end
|
63
67
|
|
64
|
-
|
65
|
-
#
|
66
|
-
#
|
68
|
+
###
|
69
|
+
# Call DatatypeValidator in helper module to process the possible datatype for the value
|
70
|
+
# Is this the right way to hide dependency on the external classes or objects
|
71
|
+
# May be a static would do ? Should I create an object and call method on the object each time rather than instantiate a new object each time ??
|
72
|
+
###
|
67
73
|
def determine_dataype(value)
|
68
74
|
return validate_field(value)
|
69
75
|
end
|
70
76
|
|
77
|
+
###
|
71
78
|
# Build the hash of hashes which hold the count of different possible datatypes for each row
|
79
|
+
###
|
72
80
|
def add_to_datatype(key, datatype)
|
73
81
|
if csv_column_datatypes[key].nil?
|
74
82
|
csv_column_datatypes[key] = {datatype => 1}
|
@@ -81,8 +89,11 @@ module CsvImportAnalyzer
|
|
81
89
|
end
|
82
90
|
end
|
83
91
|
|
84
|
-
|
85
|
-
#
|
92
|
+
###
|
93
|
+
# Finalize the datatype for each column.
|
94
|
+
# A column datatype would be set to varchar or string if even one of it's values tend to be string
|
95
|
+
# If the column doesn't have any possible strings then assign the datatype to column with maximum count of identified possibilites
|
96
|
+
###
|
86
97
|
def finalize_datatypes_for_csv
|
87
98
|
csv_column_datatypes.map { |column_name, possible_datatypes|
|
88
99
|
#If there is string type even atleast 1 there is no other option but to set the datatype to string => varchar
|
@@ -95,7 +106,12 @@ module CsvImportAnalyzer
|
|
95
106
|
}
|
96
107
|
end
|
97
108
|
|
98
|
-
|
109
|
+
###
|
110
|
+
# Decide if simple datatype analysis is enough or proced further
|
111
|
+
# Proceed further would be to
|
112
|
+
# Identify min and max bounds for each column
|
113
|
+
# Identify if the number distinct values are less than set threshold
|
114
|
+
###
|
99
115
|
def take_further_actions
|
100
116
|
if options[:check_bounds]
|
101
117
|
min_max_bounds = CsvImportAnalyzer::CsvCheckBounds.new(options)
|
@@ -107,4 +123,4 @@ module CsvImportAnalyzer
|
|
107
123
|
query.generate_query
|
108
124
|
end
|
109
125
|
end
|
110
|
-
end
|
126
|
+
end
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require "smarter_csv"
|
2
2
|
require "tempfile"
|
3
|
+
require "pry"
|
3
4
|
require_relative "analyzer/delimiter_identifier"
|
4
5
|
require_relative "helpers/string_class_extensions"
|
5
6
|
require_relative "helpers/common_functions"
|
@@ -11,27 +12,46 @@ module CsvImportAnalyzer
|
|
11
12
|
include CsvImportAnalyzer::Helper
|
12
13
|
include CsvImportAnalyzer::DelimiterIdentifier
|
13
14
|
|
15
|
+
###
|
16
|
+
# Public interface for the entire library
|
17
|
+
# What id does?
|
18
|
+
# Sets "options" varaible by merging default values and passed values
|
19
|
+
# Finds the delimiter by analyzing a sample
|
20
|
+
# Sanitizes or preprocesses the csv file by creating a temporary processed file
|
21
|
+
# Replacing null and empty values with NULL
|
22
|
+
# Replace single quotes with double quotes if needed
|
23
|
+
# Handle CSVMalformedError by logging the error to error report
|
24
|
+
# Passes the options to DatatypeAnalysis
|
25
|
+
###
|
14
26
|
def process(filename, options)
|
15
|
-
|
16
27
|
options = defaults.merge(options)
|
17
28
|
if File.exist?(filename)
|
18
|
-
options[:filename] = filename
|
19
|
-
#first thing to do - find the delimiter of the file.
|
20
29
|
delimiter = identify_delimiter(filename)
|
21
30
|
options[:delimiter] = delimiter
|
31
|
+
# create tempfiles to update any changes being made
|
32
|
+
temp_file, processed_file = create_tempfiles(filename, options)
|
33
|
+
options[:temp_file] = temp_file.path
|
34
|
+
line_count = 1
|
22
35
|
File.foreach(filename) do |line|
|
23
|
-
#Check if the line is empty - no point in processing empty lines
|
24
|
-
if line.length > 1
|
36
|
+
if line.length > 1 #Check if the line is empty - no point in processing empty lines
|
25
37
|
line = replace_line_single_quotes(line,delimiter)
|
26
38
|
begin
|
27
39
|
line = CSV.parse_line(line, {:col_sep => delimiter})
|
28
|
-
rescue CSV::MalformedCSVError
|
29
|
-
|
40
|
+
rescue CSV::MalformedCSVError
|
41
|
+
# MalformedCSVError is due to illegal quoting or unclosed quotes
|
42
|
+
# Try to add a quote at the end and resume processing
|
43
|
+
# Log the changes to report
|
44
|
+
temp_file.write("MalformedCSVError at line #{line_count}")
|
45
|
+
line = line.insert(-2, "\"")
|
30
46
|
line = CSV.parse_line(line, {:col_sep => delimiter})
|
31
47
|
end
|
32
48
|
line = replace_null_values(line)
|
49
|
+
processed_file.write(line.to_csv({:col_sep => delimiter, :converters => :numeric}))
|
33
50
|
end
|
51
|
+
line_count += 1
|
34
52
|
end
|
53
|
+
temp_file.close
|
54
|
+
processed_file.close
|
35
55
|
# Cleaned the file - Now analyze for datatypes
|
36
56
|
CsvImportAnalyzer::CsvDatatypeAnalysis.new(options).datatype_analysis
|
37
57
|
else
|
@@ -41,21 +61,30 @@ module CsvImportAnalyzer
|
|
41
61
|
|
42
62
|
private
|
43
63
|
|
64
|
+
###
|
65
|
+
# Hash of default values that would be merged with user passed in values
|
66
|
+
# returns [Hash] defaults
|
67
|
+
###
|
44
68
|
def defaults
|
45
69
|
{
|
46
|
-
:metadata_output => nil,
|
47
|
-
:processed_input => nil,
|
48
|
-
:unique => 10,
|
49
|
-
:check_bounds => true,
|
50
|
-
:datatype_analysis => 200,
|
51
|
-
:chunk =>
|
52
|
-
:database => [:pg, :mysql],
|
53
|
-
:quote_convert => true,
|
54
|
-
:replace_nulls => true,
|
55
|
-
:out_format => :json
|
70
|
+
:metadata_output => nil, # To be set if metadata needs to be printed to a file
|
71
|
+
:processed_input => nil, # To be set if processed input is needed
|
72
|
+
:unique => 10, # Threshold for number of defaults values that needs to identified
|
73
|
+
:check_bounds => true, # Option to check for min - max bounds for each column [true => find the bounds]
|
74
|
+
:datatype_analysis => 200, # Number of rows to be sampled for datatype analysis
|
75
|
+
:chunk => 200, # Chunk size (no of rows) that needs to processed in-memory [Important not to load entire file into memory]
|
76
|
+
:database => [:pg, :mysql], # Databases for which schema needs to be generated
|
77
|
+
:quote_convert => true, # Convert any single quotes to double quotes
|
78
|
+
:replace_nulls => true, # Replace nulls, empty's, nils, Null's with NULL
|
79
|
+
:out_format => :json # Set what type of output do you need as analysis
|
56
80
|
}
|
57
81
|
end
|
58
82
|
|
83
|
+
###
|
84
|
+
# Replaces single quotes with doubles in each line
|
85
|
+
# Escapes the double quotes if it's between two single quotes before
|
86
|
+
# returns [String] result
|
87
|
+
###
|
59
88
|
def replace_line_single_quotes(line, delimiter)
|
60
89
|
delimiter = "\\|" if delimiter == "|"
|
61
90
|
pattern = "#{delimiter}'.*?'#{delimiter}" # set the pattern to opening and closing single quote found between delimiters
|
@@ -82,5 +111,26 @@ module CsvImportAnalyzer
|
|
82
111
|
end
|
83
112
|
return line
|
84
113
|
end
|
114
|
+
|
115
|
+
###
|
116
|
+
# Uses ruby tempfile to create temp files for
|
117
|
+
# 1. Store processed file
|
118
|
+
# 2. Error reporting
|
119
|
+
# Returns the file handler for a temp file.
|
120
|
+
# This tempfile holds any modifications being done to the file.
|
121
|
+
###
|
122
|
+
def create_tempfiles(filename, options)
|
123
|
+
options[:original_filename] = filename
|
124
|
+
filename = File.basename(filename)
|
125
|
+
processed_filename = File.join(Dir.tmpdir, "processed_"+filename)
|
126
|
+
options[:filename] = processed_filename
|
127
|
+
# filename += Time.now.strftime("%Y%m%d%H%M%S")
|
128
|
+
# temp_file = Tempfile.new(filename)
|
129
|
+
# temp_file = File.open(File.join(Dir.tmpdir, filename), "w+")
|
130
|
+
temp_file = File.join(Dir.tmpdir, "error_report_"+filename)
|
131
|
+
temp_file = File.open(temp_file, "w+")
|
132
|
+
processed_file = File.open(processed_filename, "w+")
|
133
|
+
return temp_file, processed_file
|
134
|
+
end
|
85
135
|
end
|
86
136
|
end
|