universal_data_parser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/universal_data_parser.rb +97 -0
  3. metadata +58 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: cfcb9e3692b604fd4f7177623a2567b9f0fc42b7
4
+ data.tar.gz: 385e774d517fca2c033ffa52dc6c72c0b157c749
5
+ SHA512:
6
+ metadata.gz: 20222da8bb9e7b99c6982aa57ee07ef71b99b0fb9e9ba97949c72d34d137b0726dfbc3eafb4c8689af4e24f93448ffa55eec83adc9e0958b201a6e175a4f9385
7
+ data.tar.gz: 6448d185b267186d81a7af96fd407981c4e51f079a51e728d27899acb034c5a61e2e4f0fbc2721233c12998aef00a8fe594b4735a1fb6971757690594ad0013e
@@ -0,0 +1,97 @@
1
+ # encoding: utf-8 #
2
+ require 'csv'
3
+ require 'charlock_holmes'
4
+ require "i18n"
5
+
6
+ I18n.enforce_available_locales = false
7
+ I18n.locale = :es
8
+
9
+ module UniversalDataParser
10
+
11
+ class DataSet
12
+ attr_accessor :columns
13
+ attr_accessor :items
14
+ attr_accessor :headers
15
+ attr_accessor :raw_data
16
+
17
+ def initialize(raw_data)
18
+ @raw_data = raw_data
19
+ guess_columns
20
+ generate_data
21
+ end
22
+
23
+ private
24
+
25
+ def generate_data
26
+ items = @raw_data.map do |row|
27
+ new_row = Hash.new
28
+ puts headers
29
+ headers.each_with_index do |c, i|
30
+ new_row[c.to_sym] = row[i]
31
+ end
32
+ new_row
33
+ end
34
+ end
35
+
36
+ def guess_columns
37
+ @raw_data.each do |row|
38
+ n = false
39
+ row.each do |c|
40
+ if c.nil? or c.empty?
41
+ n = true
42
+ break
43
+ end
44
+ end
45
+ next if n
46
+ @headers = row.map! {|c| c.underscore unless c.nil? }
47
+ break
48
+ end
49
+ end
50
+ end
51
+
52
+ def self.parse_file(path)
53
+ DataSet.new(CSV.parse(convert_file(path)))
54
+ end
55
+
56
+ private
57
+
58
+ def self.convert_file(file)
59
+ contents = ""
60
+ if file.class == String
61
+ contents = File.read(file)
62
+ elsif file.class == File ||
63
+ file.class == ActionDispatch::Http::UploadedFile
64
+ contents = file.read
65
+ end
66
+ detection = CharlockHolmes::EncodingDetector.detect(contents)
67
+ CharlockHolmes::Converter.convert contents, detection[:encoding], 'UTF-8'
68
+ end
69
+
70
+ class ::String
71
+ def underscore
72
+ I18n.transliterate(self.gsub(/::/, '/').
73
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
74
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
75
+ tr("-", "_").
76
+ tr(" ", "_")).downcase.
77
+ gsub("?","")
78
+ end
79
+ end
80
+
81
+ # def self.guess_column_types
82
+ # @columns.each_with_index do |column, i|
83
+ # column_type = 'text'
84
+ # @data.each do |row|
85
+ # case true
86
+ # when /^\d*$/.match(row[i])
87
+ # column_type = 'number'
88
+ # when /^[^@]*@.*$/.match(row[i])
89
+ # column_type = 'email'
90
+ # else
91
+ # column_type = 'text'
92
+ # end
93
+ # end
94
+ # puts column_type
95
+ # end
96
+ # end
97
+ end
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: universal_data_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Ricardo Garcia
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2010-12-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: charlock_holmes
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 0.7.3
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 0.7.3
27
+ description: Gem intended to analize and deduct data from CSV, XLS, XLSX, files
28
+ email: ricardo@ukko.mx
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - lib/universal_data_parser.rb
34
+ homepage: http://rubygems.org/gems/universal_data_parser
35
+ licenses:
36
+ - MIT
37
+ metadata: {}
38
+ post_install_message:
39
+ rdoc_options: []
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ required_rubygems_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - '>='
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ requirements: []
53
+ rubyforge_project:
54
+ rubygems_version: 2.4.4
55
+ signing_key:
56
+ specification_version: 4
57
+ summary: CSV data deducting gem
58
+ test_files: []