universal_data_parser 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/universal_data_parser.rb +97 -0
- metadata +58 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: cfcb9e3692b604fd4f7177623a2567b9f0fc42b7
|
4
|
+
data.tar.gz: 385e774d517fca2c033ffa52dc6c72c0b157c749
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 20222da8bb9e7b99c6982aa57ee07ef71b99b0fb9e9ba97949c72d34d137b0726dfbc3eafb4c8689af4e24f93448ffa55eec83adc9e0958b201a6e175a4f9385
|
7
|
+
data.tar.gz: 6448d185b267186d81a7af96fd407981c4e51f079a51e728d27899acb034c5a61e2e4f0fbc2721233c12998aef00a8fe594b4735a1fb6971757690594ad0013e
|
@@ -0,0 +1,97 @@
|
|
1
|
+
# encoding: utf-8 #
|
2
|
+
require 'csv'
|
3
|
+
require 'charlock_holmes'
|
4
|
+
require "i18n"
|
5
|
+
|
6
|
+
I18n.enforce_available_locales = false
|
7
|
+
I18n.locale = :es
|
8
|
+
|
9
|
+
module UniversalDataParser
|
10
|
+
|
11
|
+
class DataSet
|
12
|
+
attr_accessor :columns
|
13
|
+
attr_accessor :items
|
14
|
+
attr_accessor :headers
|
15
|
+
attr_accessor :raw_data
|
16
|
+
|
17
|
+
def initialize(raw_data)
|
18
|
+
@raw_data = raw_data
|
19
|
+
guess_columns
|
20
|
+
generate_data
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def generate_data
|
26
|
+
items = @raw_data.map do |row|
|
27
|
+
new_row = Hash.new
|
28
|
+
puts headers
|
29
|
+
headers.each_with_index do |c, i|
|
30
|
+
new_row[c.to_sym] = row[i]
|
31
|
+
end
|
32
|
+
new_row
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def guess_columns
|
37
|
+
@raw_data.each do |row|
|
38
|
+
n = false
|
39
|
+
row.each do |c|
|
40
|
+
if c.nil? or c.empty?
|
41
|
+
n = true
|
42
|
+
break
|
43
|
+
end
|
44
|
+
end
|
45
|
+
next if n
|
46
|
+
@headers = row.map! {|c| c.underscore unless c.nil? }
|
47
|
+
break
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.parse_file(path)
|
53
|
+
DataSet.new(CSV.parse(convert_file(path)))
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def self.convert_file(file)
|
59
|
+
contents = ""
|
60
|
+
if file.class == String
|
61
|
+
contents = File.read(file)
|
62
|
+
elsif file.class == File ||
|
63
|
+
file.class == ActionDispatch::Http::UploadedFile
|
64
|
+
contents = file.read
|
65
|
+
end
|
66
|
+
detection = CharlockHolmes::EncodingDetector.detect(contents)
|
67
|
+
CharlockHolmes::Converter.convert contents, detection[:encoding], 'UTF-8'
|
68
|
+
end
|
69
|
+
|
70
|
+
class ::String
|
71
|
+
def underscore
|
72
|
+
I18n.transliterate(self.gsub(/::/, '/').
|
73
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
74
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
75
|
+
tr("-", "_").
|
76
|
+
tr(" ", "_")).downcase.
|
77
|
+
gsub("?","")
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# def self.guess_column_types
|
82
|
+
# @columns.each_with_index do |column, i|
|
83
|
+
# column_type = 'text'
|
84
|
+
# @data.each do |row|
|
85
|
+
# case true
|
86
|
+
# when /^\d*$/.match(row[i])
|
87
|
+
# column_type = 'number'
|
88
|
+
# when /^[^@]*@.*$/.match(row[i])
|
89
|
+
# column_type = 'email'
|
90
|
+
# else
|
91
|
+
# column_type = 'text'
|
92
|
+
# end
|
93
|
+
# end
|
94
|
+
# puts column_type
|
95
|
+
# end
|
96
|
+
# end
|
97
|
+
end
|
metadata
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: universal_data_parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ricardo Garcia
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2010-12-06 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: charlock_holmes
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.7.3
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.7.3
|
27
|
+
description: Gem intended to analize and deduct data from CSV, XLS, XLSX, files
|
28
|
+
email: ricardo@ukko.mx
|
29
|
+
executables: []
|
30
|
+
extensions: []
|
31
|
+
extra_rdoc_files: []
|
32
|
+
files:
|
33
|
+
- lib/universal_data_parser.rb
|
34
|
+
homepage: http://rubygems.org/gems/universal_data_parser
|
35
|
+
licenses:
|
36
|
+
- MIT
|
37
|
+
metadata: {}
|
38
|
+
post_install_message:
|
39
|
+
rdoc_options: []
|
40
|
+
require_paths:
|
41
|
+
- lib
|
42
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - '>='
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0'
|
47
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
48
|
+
requirements:
|
49
|
+
- - '>='
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: '0'
|
52
|
+
requirements: []
|
53
|
+
rubyforge_project:
|
54
|
+
rubygems_version: 2.4.4
|
55
|
+
signing_key:
|
56
|
+
specification_version: 4
|
57
|
+
summary: CSV data deducting gem
|
58
|
+
test_files: []
|