large_csv_reader 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 12f98dccfdfdc9e1c4b9cfee61c45fabc328338707d3ea39bda7ad98935d9daf
4
+ data.tar.gz: 40ca5ad5f66d00b6ab02c7236c03cdb5d53889c32687415cf3e321e74a2a6144
5
+ SHA512:
6
+ metadata.gz: d587014c2ed754c4c82b557066bef98b41411a996c8e365112dab428559749eb2080ae15f5cb405738c915d38e08bf3b362821a33d7cc0847748b4a23742b126
7
+ data.tar.gz: 77c821fde2268965a976f88c4ecb99e55b7466f4626120cf65afddab81585d5654cb7b4e97fd1973c2008fe9c57f30353b24ee57d4b711ede74e00180ed756e3
data/README.md ADDED
@@ -0,0 +1,32 @@
1
+ # Large CSV Reader GEM
2
+
3
+ This **gem** is created to help with a known issue, processing large **csv files** using **ruby,** without breaking our **RAM.** I used lazy enumeration to achieve this, that allows the methods to execute actions on one line at a time instead of loading millions of lines in memory at once.
4
+
5
+ I consider this **gem** in **Beta state**, proper testing suite is still missing and there are some improvements or extensions that could be useful to have in a full version of it.
6
+
7
+
8
+ # Installation
9
+ `gem install large_csv_reader`
10
+ - In your files:
11
+ `require 'large_csv_reader'`
12
+
13
+ # Usage
14
+
15
+ ## General Methods
16
+ |Function|Description |
17
+ |--|--|
18
+ |`reader = LargeCsvReader.new`| creates a new instance of the reader|
19
+ |`reader.generate_csv(fileName, columnNames)`|creates a new csv file with the name and header names passed as parameters|
20
+ |`reader.append_to_csv(filename, rows=1000000,rowStructure)`|Add lines to the csv, this lines are generated with the rowStructure array parameter. If rows parameters is not present by default it will load 1 million lines to the file.|
21
+ |`reader.massive_read_in_csv_data (file_name)`|lazy load of each csv row into a list|
22
+ |`reader.massive_csv_builder(filename, column_names,rowMult="1")`|create a csv with millions of lines, the value of rowMult represents how many millions lines the file will have|
23
+ |`reader.row_generator(structure)`|generate rows on demand using enumeration|
24
+
25
+ ## Specific Methods
26
+ The **rest** of the methods are considerations to solve a **test problem** with **book data** "**Date**", "**ISBN**", "**Price**"
27
+
28
+ - `massive_total_value_in_stock(csv_file_name)`
29
+ - `massive_number_of_each_isbn(csv_file_name)`
30
+ - `append_book_to_csv(filename,rows=1000000)`
31
+ - `book_generator`
32
+
@@ -0,0 +1,12 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'large_csv_reader'
3
+ s.version = '1.0.1'
4
+ s.summary = "Large CSV builder and proccesor"
5
+ s.description = "Large CSV builder and proccesor, it can help with creation of large csv files, reading those files and make operation over the csv data"
6
+ s.authors = ["Arturo Castro"]
7
+ s.email = 'agcastro1994@gmail.com'
8
+ s.files = ["lib/large_csv_reader.rb", "lib/large_csv_reader/book_in_stock.rb", "large_csv_reader.gemspec", "README.md"]
9
+ s.license = ''
10
+ s.add_dependency 'csv', '~> 1.0'
11
+ s.add_dependency 'date', '~> 1.0'
12
+ end
@@ -0,0 +1,8 @@
1
+ class BookInStock
2
+ attr_reader :isbn
3
+ attr_accessor :price
4
+ def initialize(isbn, price)
5
+ @isbn = isbn
6
+ @price = Float(price)
7
+ end
8
+ end
@@ -0,0 +1,137 @@
1
+ require 'csv'
2
+ require 'date'
3
+ require 'large_csv_reader/book_in_stock'
4
+
5
+ class LargeCsvReader
6
+ attr_accessor :books_in_stock
7
+ def initialize
8
+ @books_in_stock = []
9
+ end
10
+
11
+ def massive_csv_builder(filename,column_names,rowStructure,rowMultiplicator="1")
12
+ generate_csv(filename,column_names)
13
+ millions = rowMultiplicator.to_i
14
+ millions.times do
15
+ append_to_csv(filename, rowStructure)
16
+ end
17
+ end
18
+
19
+ def massive_book_csv_builder(filename,column_names,rowMultiplicator="1")
20
+ generate_csv(filename,column_names)
21
+ millions = rowMultiplicator.to_i
22
+ millions.times do
23
+ append_book_to_csv(filename)
24
+ end
25
+ end
26
+
27
+ def generate_csv(filename,column_names)
28
+ CSV.open(filename, "ab") do |csv|
29
+ csv << column_names
30
+ end
31
+ end
32
+
33
+ def append_book_to_csv(filename,rows=1000000)
34
+ CSV.open(filename, "ab") do |csv|
35
+ self.book_generator.lazy.select {|book| csv << [Date.new(2001,2,3), book.isbn, book.price] }.first(rows)
36
+ end
37
+ end
38
+
39
+ def append_to_csv(filename,rows=1000000,rowStructure)
40
+ CSV.open(filename, "ab") do |csv|
41
+ self.row_generator(rowStructure).lazy.select {|row| csv << row }.first(rows)
42
+ end
43
+ end
44
+
45
+ def massive_read_in_csv_data(csv_file_name)
46
+ CSV.foreach(csv_file_name, headers: true).each_slice(1).each {|row| @books_in_stock << BookInStock.new(row[0][1], row[0][2])}
47
+ end
48
+
49
+ def massive_total_value_in_stock(csv_file_name)
50
+ CSV.foreach(csv_file_name, headers: true).each_slice(1).each.inject(0) {|sum, row| sum + row[0][2].to_f }
51
+ end
52
+
53
+ def massive_number_of_each_isbn(csv_file_name)
54
+ counter_hash = {}
55
+ CSV.foreach(csv_file_name, headers: true).each_slice(1).each {|row| number_of_each_isbn(counter_hash, row) }
56
+ return counter_hash
57
+ end
58
+
59
+ def number_of_each_isbn(counter_hash, bookInfo)
60
+ isbn = bookInfo[0][1]
61
+ if counter_hash.has_key?(isbn)
62
+ counter_hash[isbn] = counter_hash[isbn]+1
63
+ else
64
+ counter_hash[isbn] = 1
65
+ end
66
+ counter_hash
67
+ end
68
+
69
+ def book_generator
70
+ Enumerator.new do |caller|
71
+ testBook = BookInStock.new("978-1-9343561-0-4", 20.05)
72
+ loop do
73
+ caller.yield testBook
74
+ end
75
+ end
76
+ end
77
+
78
+ def row_generator(structure)
79
+ Enumerator.new do |caller|
80
+ row = structure
81
+ loop do
82
+ caller.yield row
83
+ end
84
+ end
85
+ end
86
+
87
+ # def add_new_book(book, filename)
88
+ # CSV.open(filename, "ab") do |csv|
89
+ # csv << [Date.new(2001,2,3), book.isbn, book.price]
90
+ # end
91
+ # end
92
+
93
+ # def total_value_in_stock
94
+ # sum = 0.0
95
+ # @books_in_stock.each do |book|
96
+ # sum += book.price
97
+ # end
98
+ # sum
99
+ # end
100
+
101
+ # def read_in_csv_data(csv_file_name)
102
+ # CSV.foreach(csv_file_name, headers: true) do |row|
103
+ # @books_in_stock << BookInStock.new(
104
+ # row["ISBN"],
105
+ # row["Price"]
106
+ # )
107
+ # end
108
+ # end
109
+
110
+ # def generate_csv(filename,column_names, rows)
111
+ # records = self.book_generator.first(rows)
112
+
113
+ # CSV.open(filename, "ab") do |csv|
114
+ # csv << column_names
115
+ # 10.times do
116
+ # records.each do |book|
117
+ # csv << [Date.new(2001,2,3), book.isbn, book.price]
118
+ # end
119
+ # end
120
+ # end
121
+ # end
122
+
123
+ # def number_of_each_isbn
124
+
125
+ # counter_hash = {}
126
+
127
+ # @books_in_stock.each do |book|
128
+ # isbn = book.isbn
129
+ # if counter_hash.has_key?(isbn)
130
+ # counter_hash[isbn] = counter_hash[isbn]+1
131
+ # else
132
+ # counter_hash[isbn] = 1
133
+ # end
134
+ # end
135
+ # counter_hash
136
+ # end
137
+ end
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: large_csv_reader
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Arturo Castro
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-05-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: csv
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: date
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.0'
41
+ description: Large CSV builder and proccesor, it can help with creation of large csv
42
+ files, reading those files and make operation over the csv data
43
+ email: agcastro1994@gmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - README.md
49
+ - large_csv_reader.gemspec
50
+ - lib/large_csv_reader.rb
51
+ - lib/large_csv_reader/book_in_stock.rb
52
+ homepage:
53
+ licenses:
54
+ - ''
55
+ metadata: {}
56
+ post_install_message:
57
+ rdoc_options: []
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ requirements: []
71
+ rubygems_version: 3.3.14
72
+ signing_key:
73
+ specification_version: 4
74
+ summary: Large CSV builder and proccesor
75
+ test_files: []