csv-import-analyzer 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +8 -1
- data/csv-import-analyzer.gemspec +1 -1
- data/lib/csv-import-analyzer.rb +6 -4
- data/lib/csv-import-analyzer/analyzer/csv_check_bounds.rb +30 -19
- data/lib/csv-import-analyzer/analyzer/delimiter_identifier.rb +44 -24
- data/lib/csv-import-analyzer/analyzer/file_type_assertion.rb +1 -5
- data/lib/csv-import-analyzer/csv_datatype_analysis.rb +25 -9
- data/lib/csv-import-analyzer/csv_sanitizer.rb +67 -17
- data/lib/csv-import-analyzer/export/metadata_analysis.rb +63 -7
- data/lib/csv-import-analyzer/helpers/common_functions.rb +4 -0
- data/lib/csv-import-analyzer/helpers/datatype_validation.rb +6 -6
- data/lib/csv-import-analyzer/helpers/string_class_extensions.rb +9 -3
- data/lib/csv-import-analyzer/query_builder/mysql_query_helper.rb +2 -2
- data/lib/csv-import-analyzer/query_builder/pg_query_helper.rb +1 -2
- data/lib/csv-import-analyzer/query_builder/query_helper.rb +2 -2
- data/lib/csv-import-analyzer/sql_query_builder.rb +27 -12
- data/lib/csv-import-analyzer/version.rb +1 -1
- data/spec/csv-import-analyzer/analyzer/csv_check_bounds_spec.rb +8 -8
- data/spec/csv-import-analyzer/analyzer/delimiter_identifier_spec.rb +13 -13
- data/spec/csv-import-analyzer/csv_sanitizer_spec.rb +10 -7
- data/spec/csv-import-analyzer/helpers/common_functions_spec.rb +20 -19
- data/spec/csv-import-analyzer/helpers/datatype_validation_spec.rb +28 -28
- data/spec/csv-import-analyzer/helpers/string_class_extension_spec.rb +6 -6
- data/spec/csv-import-analyzer/query_builder/mysql_query_helper_spec.rb +13 -13
- data/spec/csv-import-analyzer/query_builder/pg_query_helper_spec.rb +16 -16
- data/spec/csv-import-analyzer_spec.rb +3 -6
- data/spec/fixtures/sample.csv +2 -2
- data/spec/spec_helper.rb +3 -0
- metadata +17 -6
- data/lib/csv-import-analyzer/sampleTab.csv +0 -5
- data/samples/metadata_output.json +0 -70
- data/spec/csv-import-analyzer/csv_datatype_analysis_spec.rb +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7b62ce806e6c9ce5dbd5bb625cec61e9f62e861d
|
4
|
+
data.tar.gz: 097ec0f8a105b92a0adf7869f4e01d5330d62abc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a0e7aff88f46560a7c263bee87f6840f2b918acd5003e0a3cd710a37910392d1452468d5fce3e8d0ee3aece44ab79f17c08da1aabab226ae70ef9bd58e9cf45c
|
7
|
+
data.tar.gz: 9778e7a00b0c972aaa45a6260a4a03b7ea699612c68740ab8acd10bbfc1369e4fc6f9e5c167a2b4936877c87c99a22df9965865d95cc760763541c6524f21a2f
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -34,10 +34,17 @@ Calling process on a filename would generate a metadata_output.json which has th
|
|
34
34
|
|
35
35
|
## TODO:
|
36
36
|
<ul>
|
37
|
+
<li> Handle control of processed input file to user </li>
|
37
38
|
<li> Return the analysis as Json object.</li>
|
38
|
-
<li> Structuring the analysis outputted to csv
|
39
|
+
<li> Better - Structuring the analysis outputted to csv</li>
|
40
|
+
<li> Add support to convert and import xlsx files to csv </li>
|
39
41
|
</ul>
|
40
42
|
|
43
|
+
## Additional Information
|
44
|
+
|
45
|
+
### Dependencies
|
46
|
+
<ul><li><a href="https://github.com/tilo/smarter_csv">smarter_csv</a> - For processing the csv in chunks</li></ul>
|
47
|
+
|
41
48
|
## Contributing
|
42
49
|
|
43
50
|
1. Fork it ( https://github.com/avinash-vllbh/csv-import-analyzer/fork )
|
data/csv-import-analyzer.gemspec
CHANGED
@@ -24,5 +24,5 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.add_development_dependency "rspec", "~> 3.0"
|
25
25
|
spec.add_development_dependency "simplecov", "~> 0.9"
|
26
26
|
|
27
|
-
spec.add_runtime_dependency "smarter_csv", "~> 1.0.17"
|
27
|
+
spec.add_runtime_dependency "smarter_csv", "~> 1.0", ">= 1.0.17"
|
28
28
|
end
|
data/lib/csv-import-analyzer.rb
CHANGED
@@ -1,10 +1,15 @@
|
|
1
|
-
require 'pry'
|
2
1
|
require_relative "csv-import-analyzer/csv_sanitizer"
|
3
2
|
require_relative "csv-import-analyzer/helpers/errors"
|
4
3
|
module CsvImportAnalyzer
|
5
4
|
# To identify the methods in the module as class methods
|
6
5
|
extend self
|
7
6
|
|
7
|
+
###
|
8
|
+
# main public interface to the library
|
9
|
+
# makes sure that the file exists and
|
10
|
+
# passes the file and any additional options given to CsvSanitizer
|
11
|
+
# returns FileNotFound if given file is invalid
|
12
|
+
###
|
8
13
|
def process(filename, options = {})
|
9
14
|
if File::exist?(filename)
|
10
15
|
CsvImportAnalyzer::CsvSanitizer.new().process(File.absolute_path(filename), options)
|
@@ -13,6 +18,3 @@ module CsvImportAnalyzer
|
|
13
18
|
end
|
14
19
|
end
|
15
20
|
end
|
16
|
-
|
17
|
-
CsvImportAnalyzer.process("sampleTab.csv", {:metadata_output => true, :unique => 2})
|
18
|
-
# CsvImportAnalyzer.process("sampleTab.csv", {:metadata_output => true, :out_format => :csv})
|
@@ -1,5 +1,4 @@
|
|
1
1
|
require 'smarter_csv'
|
2
|
-
require 'pry'
|
3
2
|
require_relative "../helpers/common_functions"
|
4
3
|
require_relative "../helpers/errors"
|
5
4
|
|
@@ -15,7 +14,6 @@ module CsvImportAnalyzer
|
|
15
14
|
@min_max_bounds = {}
|
16
15
|
@distinct_values = {}
|
17
16
|
@nullable = options[:nullable] || []
|
18
|
-
|
19
17
|
end
|
20
18
|
|
21
19
|
def filename
|
@@ -31,10 +29,14 @@ module CsvImportAnalyzer
|
|
31
29
|
@max_distinct_values ||= Integer(options[:unique]) + 1
|
32
30
|
end
|
33
31
|
|
34
|
-
# Public interface
|
32
|
+
# Public interface for CsvCheckBounds
|
33
|
+
# Processes the CSV file for min & max values and distinct values for each column
|
35
34
|
def get_min_max_values
|
36
35
|
unless filename.nil?
|
37
36
|
if File.exist?(filename)
|
37
|
+
# Using SmarterCSV gem to retrieve the csv records in chunks
|
38
|
+
# Chunk size can be set by the user
|
39
|
+
# E.g. :chunk_size => 200 would retrieve 200 rows each time
|
38
40
|
SmarterCSV.process(filename, {:col_sep => delimiter, :chunk_size => chunk_size,
|
39
41
|
:remove_empty_values => false, :remove_zero_values => false}) do |chunk|
|
40
42
|
chunk.each do |row|
|
@@ -59,9 +61,10 @@ module CsvImportAnalyzer
|
|
59
61
|
|
60
62
|
private
|
61
63
|
|
62
|
-
|
63
|
-
#If the key is of String type then we find the max length of it
|
64
|
-
|
64
|
+
###
|
65
|
+
# If the key is of String type then we find the max length of it
|
66
|
+
# Any other datatype would have a min and max ranges
|
67
|
+
###
|
65
68
|
def process_min_max_for_column(key, value)
|
66
69
|
if min_max_bounds[key].nil?
|
67
70
|
unless csv_column_datatypes[key] == :string
|
@@ -73,22 +76,30 @@ module CsvImportAnalyzer
|
|
73
76
|
add_bounds(key, value)
|
74
77
|
end
|
75
78
|
|
76
|
-
|
77
|
-
#Method
|
78
|
-
|
79
|
+
###
|
80
|
+
# Method to decide on the min max values for each key
|
81
|
+
# Checks for length if key is of String format
|
82
|
+
# Check for values if key is of Numeric or Datetime format
|
83
|
+
###
|
79
84
|
def add_bounds(key, value)
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
85
|
+
begin
|
86
|
+
if csv_column_datatypes[key] == :string
|
87
|
+
min_max_bounds[key][:min] = value.length if value.length < min_max_bounds[key][:min]
|
88
|
+
min_max_bounds[key][:max] = value.length if value.length > min_max_bounds[key][:max]
|
89
|
+
else
|
90
|
+
min_max_bounds[key][:min] = value if value < min_max_bounds[key][:min]
|
91
|
+
min_max_bounds[key][:max] = value if value > min_max_bounds[key][:max]
|
92
|
+
end
|
93
|
+
rescue ArgumentError, NoMethodError => e
|
94
|
+
###
|
95
|
+
# TODO: Handle csv parse coversions of datatypes
|
96
|
+
###
|
86
97
|
end
|
87
98
|
end
|
88
99
|
|
89
|
-
|
90
|
-
#Processes the max number of distinct values set for each column
|
91
|
-
|
100
|
+
###
|
101
|
+
# Processes the max number of distinct values set for each column
|
102
|
+
###
|
92
103
|
def process_distinct_values(key, value)
|
93
104
|
if distinct_values[key].nil?
|
94
105
|
distinct_values[key] = [value]
|
@@ -101,4 +112,4 @@ module CsvImportAnalyzer
|
|
101
112
|
end
|
102
113
|
|
103
114
|
end
|
104
|
-
end
|
115
|
+
end
|
@@ -1,39 +1,31 @@
|
|
1
1
|
require_relative "../helpers/string_class_extensions"
|
2
2
|
require 'pry'
|
3
|
-
|
4
3
|
module CsvImportAnalyzer
|
5
4
|
module DelimiterIdentifier
|
6
5
|
|
7
|
-
|
8
|
-
|
6
|
+
###
|
7
|
+
# Types of delimiters that the gem has to lookout for.
|
8
|
+
# Could be changed in future or to custom delimiters
|
9
|
+
# returns a @delimiter instance variable array
|
10
|
+
###
|
9
11
|
def delimiter
|
10
12
|
@delimiter ||= [",", ";", "\t", "|"]
|
11
13
|
end
|
12
14
|
|
15
|
+
###
|
16
|
+
# Routine to intialize the delimiter_count hash with the delimiters defined above with a base count of 0
|
17
|
+
# Returns @delimiter_count instance variable
|
18
|
+
###
|
13
19
|
def delimiter_count
|
14
20
|
@delimiter_count ||= Hash[delimiter.map {|v| [v,0]}]
|
15
21
|
@delimiter_count
|
16
22
|
end
|
17
23
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
def count_occurances_delimiter(line)
|
24
|
-
delimiter_count.keys.each do |key|
|
25
|
-
#Count the occurances of delimiter in a line
|
26
|
-
total_count_delimiter = line.substr_count(key)
|
27
|
-
#count the occurances of delimiter between quotes inside a line to disregard them
|
28
|
-
quoted_delimiter_count = getting_contents_of_quoted_values(line).substr_count(key)
|
29
|
-
delimiter_count[key] += total_count_delimiter - quoted_delimiter_count
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
def return_plausible_delimiter
|
34
|
-
return delimiter_count.key(delimiter_count.values.max)
|
35
|
-
end
|
36
|
-
|
24
|
+
###
|
25
|
+
# Method to analyze input data and determine delimiter
|
26
|
+
# Input can be either a csv file or even a array of strings
|
27
|
+
# returns delimiter
|
28
|
+
###
|
37
29
|
def identify_delimiter(filename_or_sample)
|
38
30
|
#filename_or_sample input can be either a File or an Array or a string - Return delimiter for File or an Array of strings (if found)
|
39
31
|
if filename_or_sample.class == String
|
@@ -60,7 +52,35 @@ module CsvImportAnalyzer
|
|
60
52
|
InvalidInput.new
|
61
53
|
end
|
62
54
|
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def getting_contents_of_quoted_values(input)
|
59
|
+
#return a join of all the strings inside quotes inside a line
|
60
|
+
input.scan(/".*?"/).join
|
61
|
+
end
|
62
|
+
|
63
|
+
###
|
64
|
+
# Find the count of delimiter occurances in a line
|
65
|
+
# CSV files can have delimiters escaped between quotes
|
66
|
+
# valid count = total_count - delimiters inside quotes
|
67
|
+
###
|
68
|
+
def count_occurances_delimiter(line)
|
69
|
+
delimiter_count.keys.each do |key|
|
70
|
+
#Count the occurances of delimiter in a line
|
71
|
+
total_count_delimiter = line.substr_count(key)
|
72
|
+
#count the occurances of delimiter between quotes inside a line to disregard them
|
73
|
+
quoted_delimiter_count = getting_contents_of_quoted_values(line).substr_count(key)
|
74
|
+
delimiter_count[key] += total_count_delimiter - quoted_delimiter_count
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
###
|
79
|
+
# Plausible delimiter would be the one i.e. of most occurance of the set of rows
|
80
|
+
###
|
81
|
+
def return_plausible_delimiter
|
82
|
+
return delimiter_count.key(delimiter_count.values.max)
|
83
|
+
end
|
84
|
+
|
63
85
|
end
|
64
86
|
end
|
65
|
-
|
66
|
-
# puts CsvImportAnalyzer::DelimiterIdentifier.identify_delimiter("/home/avinash/Desktop/csv-import-analyzer/spec/fixtures/sample.csv")
|
@@ -1,10 +1,10 @@
|
|
1
1
|
require "smarter_csv"
|
2
2
|
require "tempfile"
|
3
|
-
require "pry"
|
4
3
|
require_relative "helpers/datatype_validation"
|
5
4
|
require_relative "analyzer/csv_check_bounds"
|
6
5
|
require_relative "helpers/common_functions"
|
7
6
|
require_relative "sql_query_builder"
|
7
|
+
require "pry"
|
8
8
|
|
9
9
|
module CsvImportAnalyzer
|
10
10
|
class CsvDatatypeAnalysis
|
@@ -27,8 +27,12 @@ module CsvImportAnalyzer
|
|
27
27
|
@options[:filename]
|
28
28
|
end
|
29
29
|
|
30
|
-
|
30
|
+
###
|
31
31
|
# Process a chunk of csv file for all possible datatypes towards each column in the row
|
32
|
+
# This datatype analysis is used for analyzing,
|
33
|
+
# Min - Max values of each column
|
34
|
+
# Distinct values of each column
|
35
|
+
# Enumeration eligibility
|
32
36
|
def datatype_analysis
|
33
37
|
SmarterCSV.process(filename, {:col_sep => delimiter, :chunk_size => chunk_size,
|
34
38
|
:remove_empty_values => false, :remove_zero_values => false}) do |chunk|
|
@@ -61,14 +65,18 @@ module CsvImportAnalyzer
|
|
61
65
|
return options[:chunk]
|
62
66
|
end
|
63
67
|
|
64
|
-
|
65
|
-
#
|
66
|
-
#
|
68
|
+
###
|
69
|
+
# Call DatatypeValidator in helper module to process the possible datatype for the value
|
70
|
+
# Is this the right way to hide dependency on the external classes or objects
|
71
|
+
# May be a static would do ? Should I create an object and call method on the object each time rather than instantiate a new object each time ??
|
72
|
+
###
|
67
73
|
def determine_dataype(value)
|
68
74
|
return validate_field(value)
|
69
75
|
end
|
70
76
|
|
77
|
+
###
|
71
78
|
# Build the hash of hashes which hold the count of different possible datatypes for each row
|
79
|
+
###
|
72
80
|
def add_to_datatype(key, datatype)
|
73
81
|
if csv_column_datatypes[key].nil?
|
74
82
|
csv_column_datatypes[key] = {datatype => 1}
|
@@ -81,8 +89,11 @@ module CsvImportAnalyzer
|
|
81
89
|
end
|
82
90
|
end
|
83
91
|
|
84
|
-
|
85
|
-
#
|
92
|
+
###
|
93
|
+
# Finalize the datatype for each column.
|
94
|
+
# A column datatype would be set to varchar or string if even one of it's values tend to be string
|
95
|
+
# If the column doesn't have any possible strings then assign the datatype to column with maximum count of identified possibilites
|
96
|
+
###
|
86
97
|
def finalize_datatypes_for_csv
|
87
98
|
csv_column_datatypes.map { |column_name, possible_datatypes|
|
88
99
|
#If there is string type even atleast 1 there is no other option but to set the datatype to string => varchar
|
@@ -95,7 +106,12 @@ module CsvImportAnalyzer
|
|
95
106
|
}
|
96
107
|
end
|
97
108
|
|
98
|
-
|
109
|
+
###
|
110
|
+
# Decide if simple datatype analysis is enough or proced further
|
111
|
+
# Proceed further would be to
|
112
|
+
# Identify min and max bounds for each column
|
113
|
+
# Identify if the number distinct values are less than set threshold
|
114
|
+
###
|
99
115
|
def take_further_actions
|
100
116
|
if options[:check_bounds]
|
101
117
|
min_max_bounds = CsvImportAnalyzer::CsvCheckBounds.new(options)
|
@@ -107,4 +123,4 @@ module CsvImportAnalyzer
|
|
107
123
|
query.generate_query
|
108
124
|
end
|
109
125
|
end
|
110
|
-
end
|
126
|
+
end
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require "smarter_csv"
|
2
2
|
require "tempfile"
|
3
|
+
require "pry"
|
3
4
|
require_relative "analyzer/delimiter_identifier"
|
4
5
|
require_relative "helpers/string_class_extensions"
|
5
6
|
require_relative "helpers/common_functions"
|
@@ -11,27 +12,46 @@ module CsvImportAnalyzer
|
|
11
12
|
include CsvImportAnalyzer::Helper
|
12
13
|
include CsvImportAnalyzer::DelimiterIdentifier
|
13
14
|
|
15
|
+
###
|
16
|
+
# Public interface for the entire library
|
17
|
+
# What id does?
|
18
|
+
# Sets "options" varaible by merging default values and passed values
|
19
|
+
# Finds the delimiter by analyzing a sample
|
20
|
+
# Sanitizes or preprocesses the csv file by creating a temporary processed file
|
21
|
+
# Replacing null and empty values with NULL
|
22
|
+
# Replace single quotes with double quotes if needed
|
23
|
+
# Handle CSVMalformedError by logging the error to error report
|
24
|
+
# Passes the options to DatatypeAnalysis
|
25
|
+
###
|
14
26
|
def process(filename, options)
|
15
|
-
|
16
27
|
options = defaults.merge(options)
|
17
28
|
if File.exist?(filename)
|
18
|
-
options[:filename] = filename
|
19
|
-
#first thing to do - find the delimiter of the file.
|
20
29
|
delimiter = identify_delimiter(filename)
|
21
30
|
options[:delimiter] = delimiter
|
31
|
+
# create tempfiles to update any changes being made
|
32
|
+
temp_file, processed_file = create_tempfiles(filename, options)
|
33
|
+
options[:temp_file] = temp_file.path
|
34
|
+
line_count = 1
|
22
35
|
File.foreach(filename) do |line|
|
23
|
-
#Check if the line is empty - no point in processing empty lines
|
24
|
-
if line.length > 1
|
36
|
+
if line.length > 1 #Check if the line is empty - no point in processing empty lines
|
25
37
|
line = replace_line_single_quotes(line,delimiter)
|
26
38
|
begin
|
27
39
|
line = CSV.parse_line(line, {:col_sep => delimiter})
|
28
|
-
rescue CSV::MalformedCSVError
|
29
|
-
|
40
|
+
rescue CSV::MalformedCSVError
|
41
|
+
# MalformedCSVError is due to illegal quoting or unclosed quotes
|
42
|
+
# Try to add a quote at the end and resume processing
|
43
|
+
# Log the changes to report
|
44
|
+
temp_file.write("MalformedCSVError at line #{line_count}")
|
45
|
+
line = line.insert(-2, "\"")
|
30
46
|
line = CSV.parse_line(line, {:col_sep => delimiter})
|
31
47
|
end
|
32
48
|
line = replace_null_values(line)
|
49
|
+
processed_file.write(line.to_csv({:col_sep => delimiter, :converters => :numeric}))
|
33
50
|
end
|
51
|
+
line_count += 1
|
34
52
|
end
|
53
|
+
temp_file.close
|
54
|
+
processed_file.close
|
35
55
|
# Cleaned the file - Now analyze for datatypes
|
36
56
|
CsvImportAnalyzer::CsvDatatypeAnalysis.new(options).datatype_analysis
|
37
57
|
else
|
@@ -41,21 +61,30 @@ module CsvImportAnalyzer
|
|
41
61
|
|
42
62
|
private
|
43
63
|
|
64
|
+
###
|
65
|
+
# Hash of default values that would be merged with user passed in values
|
66
|
+
# returns [Hash] defaults
|
67
|
+
###
|
44
68
|
def defaults
|
45
69
|
{
|
46
|
-
:metadata_output => nil,
|
47
|
-
:processed_input => nil,
|
48
|
-
:unique => 10,
|
49
|
-
:check_bounds => true,
|
50
|
-
:datatype_analysis => 200,
|
51
|
-
:chunk =>
|
52
|
-
:database => [:pg, :mysql],
|
53
|
-
:quote_convert => true,
|
54
|
-
:replace_nulls => true,
|
55
|
-
:out_format => :json
|
70
|
+
:metadata_output => nil, # To be set if metadata needs to be printed to a file
|
71
|
+
:processed_input => nil, # To be set if processed input is needed
|
72
|
+
:unique => 10, # Threshold for number of defaults values that needs to identified
|
73
|
+
:check_bounds => true, # Option to check for min - max bounds for each column [true => find the bounds]
|
74
|
+
:datatype_analysis => 200, # Number of rows to be sampled for datatype analysis
|
75
|
+
:chunk => 200, # Chunk size (no of rows) that needs to processed in-memory [Important not to load entire file into memory]
|
76
|
+
:database => [:pg, :mysql], # Databases for which schema needs to be generated
|
77
|
+
:quote_convert => true, # Convert any single quotes to double quotes
|
78
|
+
:replace_nulls => true, # Replace nulls, empty's, nils, Null's with NULL
|
79
|
+
:out_format => :json # Set what type of output do you need as analysis
|
56
80
|
}
|
57
81
|
end
|
58
82
|
|
83
|
+
###
|
84
|
+
# Replaces single quotes with doubles in each line
|
85
|
+
# Escapes the double quotes if it's between two single quotes before
|
86
|
+
# returns [String] result
|
87
|
+
###
|
59
88
|
def replace_line_single_quotes(line, delimiter)
|
60
89
|
delimiter = "\\|" if delimiter == "|"
|
61
90
|
pattern = "#{delimiter}'.*?'#{delimiter}" # set the pattern to opening and closing single quote found between delimiters
|
@@ -82,5 +111,26 @@ module CsvImportAnalyzer
|
|
82
111
|
end
|
83
112
|
return line
|
84
113
|
end
|
114
|
+
|
115
|
+
###
|
116
|
+
# Uses ruby tempfile to create temp files for
|
117
|
+
# 1. Store processed file
|
118
|
+
# 2. Error reporting
|
119
|
+
# Returns the file handler for a temp file.
|
120
|
+
# This tempfile holds any modifications being done to the file.
|
121
|
+
###
|
122
|
+
def create_tempfiles(filename, options)
|
123
|
+
options[:original_filename] = filename
|
124
|
+
filename = File.basename(filename)
|
125
|
+
processed_filename = File.join(Dir.tmpdir, "processed_"+filename)
|
126
|
+
options[:filename] = processed_filename
|
127
|
+
# filename += Time.now.strftime("%Y%m%d%H%M%S")
|
128
|
+
# temp_file = Tempfile.new(filename)
|
129
|
+
# temp_file = File.open(File.join(Dir.tmpdir, filename), "w+")
|
130
|
+
temp_file = File.join(Dir.tmpdir, "error_report_"+filename)
|
131
|
+
temp_file = File.open(temp_file, "w+")
|
132
|
+
processed_file = File.open(processed_filename, "w+")
|
133
|
+
return temp_file, processed_file
|
134
|
+
end
|
85
135
|
end
|
86
136
|
end
|