universal_data_parser 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/universal_data_parser.rb +97 -0
  3. metadata +58 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: cfcb9e3692b604fd4f7177623a2567b9f0fc42b7
4
+ data.tar.gz: 385e774d517fca2c033ffa52dc6c72c0b157c749
5
+ SHA512:
6
+ metadata.gz: 20222da8bb9e7b99c6982aa57ee07ef71b99b0fb9e9ba97949c72d34d137b0726dfbc3eafb4c8689af4e24f93448ffa55eec83adc9e0958b201a6e175a4f9385
7
+ data.tar.gz: 6448d185b267186d81a7af96fd407981c4e51f079a51e728d27899acb034c5a61e2e4f0fbc2721233c12998aef00a8fe594b4735a1fb6971757690594ad0013e
@@ -0,0 +1,97 @@
1
+ # encoding: utf-8 #
2
+ require 'csv'
3
+ require 'charlock_holmes'
4
+ require "i18n"
5
+
6
+ I18n.enforce_available_locales = false
7
+ I18n.locale = :es
8
+
9
+ module UniversalDataParser
10
+
11
+ class DataSet
12
+ attr_accessor :columns
13
+ attr_accessor :items
14
+ attr_accessor :headers
15
+ attr_accessor :raw_data
16
+
17
+ def initialize(raw_data)
18
+ @raw_data = raw_data
19
+ guess_columns
20
+ generate_data
21
+ end
22
+
23
+ private
24
+
25
+ def generate_data
26
+ items = @raw_data.map do |row|
27
+ new_row = Hash.new
28
+ puts headers
29
+ headers.each_with_index do |c, i|
30
+ new_row[c.to_sym] = row[i]
31
+ end
32
+ new_row
33
+ end
34
+ end
35
+
36
+ def guess_columns
37
+ @raw_data.each do |row|
38
+ n = false
39
+ row.each do |c|
40
+ if c.nil? or c.empty?
41
+ n = true
42
+ break
43
+ end
44
+ end
45
+ next if n
46
+ @headers = row.map! {|c| c.underscore unless c.nil? }
47
+ break
48
+ end
49
+ end
50
+ end
51
+
52
+ def self.parse_file(path)
53
+ DataSet.new(CSV.parse(convert_file(path)))
54
+ end
55
+
56
+ private
57
+
58
+ def self.convert_file(file)
59
+ contents = ""
60
+ if file.class == String
61
+ contents = File.read(file)
62
+ elsif file.class == File ||
63
+ file.class == ActionDispatch::Http::UploadedFile
64
+ contents = file.read
65
+ end
66
+ detection = CharlockHolmes::EncodingDetector.detect(contents)
67
+ CharlockHolmes::Converter.convert contents, detection[:encoding], 'UTF-8'
68
+ end
69
+
70
+ class ::String
71
+ def underscore
72
+ I18n.transliterate(self.gsub(/::/, '/').
73
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
74
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
75
+ tr("-", "_").
76
+ tr(" ", "_")).downcase.
77
+ gsub("?","")
78
+ end
79
+ end
80
+
81
+ # def self.guess_column_types
82
+ # @columns.each_with_index do |column, i|
83
+ # column_type = 'text'
84
+ # @data.each do |row|
85
+ # case true
86
+ # when /^\d*$/.match(row[i])
87
+ # column_type = 'number'
88
+ # when /^[^@]*@.*$/.match(row[i])
89
+ # column_type = 'email'
90
+ # else
91
+ # column_type = 'text'
92
+ # end
93
+ # end
94
+ # puts column_type
95
+ # end
96
+ # end
97
+ end
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: universal_data_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Ricardo Garcia
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2010-12-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: charlock_holmes
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 0.7.3
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 0.7.3
27
+ description: Gem intended to analize and deduct data from CSV, XLS, XLSX, files
28
+ email: ricardo@ukko.mx
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - lib/universal_data_parser.rb
34
+ homepage: http://rubygems.org/gems/universal_data_parser
35
+ licenses:
36
+ - MIT
37
+ metadata: {}
38
+ post_install_message:
39
+ rdoc_options: []
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ required_rubygems_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - '>='
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ requirements: []
53
+ rubyforge_project:
54
+ rubygems_version: 2.4.4
55
+ signing_key:
56
+ specification_version: 4
57
+ summary: CSV data deducting gem
58
+ test_files: []