large_csv_reader 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 12f98dccfdfdc9e1c4b9cfee61c45fabc328338707d3ea39bda7ad98935d9daf
4
+ data.tar.gz: 40ca5ad5f66d00b6ab02c7236c03cdb5d53889c32687415cf3e321e74a2a6144
5
+ SHA512:
6
+ metadata.gz: d587014c2ed754c4c82b557066bef98b41411a996c8e365112dab428559749eb2080ae15f5cb405738c915d38e08bf3b362821a33d7cc0847748b4a23742b126
7
+ data.tar.gz: 77c821fde2268965a976f88c4ecb99e55b7466f4626120cf65afddab81585d5654cb7b4e97fd1973c2008fe9c57f30353b24ee57d4b711ede74e00180ed756e3
data/README.md ADDED
@@ -0,0 +1,32 @@
1
+ # Large CSV Reader GEM
2
+
3
+ This **gem** is created to help with a known issue, processing large **csv files** using **ruby,** without breaking our **RAM.** I used lazy enumeration to achieve this, that allows the methods to execute actions on one line at a time instead of loading millions of lines in memory at once.
4
+
5
+ I consider this **gem** in **Beta state**, proper testing suite is still missing and there are some improvements or extensions that could be useful to have in a full version of it.
6
+
7
+
8
+ # Installation
9
+ `gem install large_csv_reader`
10
+ - In your files:
11
+ `require 'large_csv_reader'`
12
+
13
+ # Usage
14
+
15
+ ## General Methods
16
+ |Function|Description |
17
+ |--|--|
18
+ |`reader = LargeCsvReader.new`| creates a new instance of the reader|
19
+ |`reader.generate_csv(fileName, columnNames)`|creates a new csv file with the name and header names passed as parameters|
20
+ |`reader.append_to_csv(filename, rows=1000000,rowStructure)`|Add lines to the csv, this lines are generated with the rowStructure array parameter. If rows parameters is not present by default it will load 1 million lines to the file.|
21
+ |`reader.massive_read_in_csv_data (file_name)`|lazy load of each csv row into a list|
22
+ |`reader.massive_csv_builder(filename, column_names,rowMult="1")`|create a csv with millions of lines, the value of rowMult represents how many millions lines the file will have|
23
+ |`reader.row_generator(structure)`|generate rows on demand using enumeration|
24
+
25
+ ## Specific Methods
26
+ The **rest** of the methods are considerations to solve a **test problem** with **book data** "**Date**", "**ISBN**", "**Price**"
27
+
28
+ - `massive_total_value_in_stock(csv_file_name)`
29
+ - `massive_number_of_each_isbn(csv_file_name)`
30
+ - `append_book_to_csv(filename,rows=1000000)`
31
+ - `book_generator`
32
+
@@ -0,0 +1,12 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'large_csv_reader'
3
+ s.version = '1.0.1'
4
+ s.summary = "Large CSV builder and proccesor"
5
+ s.description = "Large CSV builder and proccesor, it can help with creation of large csv files, reading those files and make operation over the csv data"
6
+ s.authors = ["Arturo Castro"]
7
+ s.email = 'agcastro1994@gmail.com'
8
+ s.files = ["lib/large_csv_reader.rb", "lib/large_csv_reader/book_in_stock.rb", "large_csv_reader.gemspec", "README.md"]
9
+ s.license = ''
10
+ s.add_dependency 'csv', '~> 1.0'
11
+ s.add_dependency 'date', '~> 1.0'
12
+ end
@@ -0,0 +1,8 @@
1
+ class BookInStock
2
+ attr_reader :isbn
3
+ attr_accessor :price
4
+ def initialize(isbn, price)
5
+ @isbn = isbn
6
+ @price = Float(price)
7
+ end
8
+ end
@@ -0,0 +1,137 @@
1
+ require 'csv'
2
+ require 'date'
3
+ require 'large_csv_reader/book_in_stock'
4
+
5
+ class LargeCsvReader
6
+ attr_accessor :books_in_stock
7
+ def initialize
8
+ @books_in_stock = []
9
+ end
10
+
11
+ def massive_csv_builder(filename,column_names,rowStructure,rowMultiplicator="1")
12
+ generate_csv(filename,column_names)
13
+ millions = rowMultiplicator.to_i
14
+ millions.times do
15
+ append_to_csv(filename, rowStructure)
16
+ end
17
+ end
18
+
19
+ def massive_book_csv_builder(filename,column_names,rowMultiplicator="1")
20
+ generate_csv(filename,column_names)
21
+ millions = rowMultiplicator.to_i
22
+ millions.times do
23
+ append_book_to_csv(filename)
24
+ end
25
+ end
26
+
27
+ def generate_csv(filename,column_names)
28
+ CSV.open(filename, "ab") do |csv|
29
+ csv << column_names
30
+ end
31
+ end
32
+
33
+ def append_book_to_csv(filename,rows=1000000)
34
+ CSV.open(filename, "ab") do |csv|
35
+ self.book_generator.lazy.select {|book| csv << [Date.new(2001,2,3), book.isbn, book.price] }.first(rows)
36
+ end
37
+ end
38
+
39
+ def append_to_csv(filename,rows=1000000,rowStructure)
40
+ CSV.open(filename, "ab") do |csv|
41
+ self.row_generator(rowStructure).lazy.select {|row| csv << row }.first(rows)
42
+ end
43
+ end
44
+
45
+ def massive_read_in_csv_data(csv_file_name)
46
+ CSV.foreach(csv_file_name, headers: true).each_slice(1).each {|row| @books_in_stock << BookInStock.new(row[0][1], row[0][2])}
47
+ end
48
+
49
+ def massive_total_value_in_stock(csv_file_name)
50
+ CSV.foreach(csv_file_name, headers: true).each_slice(1).each.inject(0) {|sum, row| sum + row[0][2].to_f }
51
+ end
52
+
53
+ def massive_number_of_each_isbn(csv_file_name)
54
+ counter_hash = {}
55
+ CSV.foreach(csv_file_name, headers: true).each_slice(1).each {|row| number_of_each_isbn(counter_hash, row) }
56
+ return counter_hash
57
+ end
58
+
59
+ def number_of_each_isbn(counter_hash, bookInfo)
60
+ isbn = bookInfo[0][1]
61
+ if counter_hash.has_key?(isbn)
62
+ counter_hash[isbn] = counter_hash[isbn]+1
63
+ else
64
+ counter_hash[isbn] = 1
65
+ end
66
+ counter_hash
67
+ end
68
+
69
+ def book_generator
70
+ Enumerator.new do |caller|
71
+ testBook = BookInStock.new("978-1-9343561-0-4", 20.05)
72
+ loop do
73
+ caller.yield testBook
74
+ end
75
+ end
76
+ end
77
+
78
+ def row_generator(structure)
79
+ Enumerator.new do |caller|
80
+ row = structure
81
+ loop do
82
+ caller.yield row
83
+ end
84
+ end
85
+ end
86
+
87
+ # def add_new_book(book, filename)
88
+ # CSV.open(filename, "ab") do |csv|
89
+ # csv << [Date.new(2001,2,3), book.isbn, book.price]
90
+ # end
91
+ # end
92
+
93
+ # def total_value_in_stock
94
+ # sum = 0.0
95
+ # @books_in_stock.each do |book|
96
+ # sum += book.price
97
+ # end
98
+ # sum
99
+ # end
100
+
101
+ # def read_in_csv_data(csv_file_name)
102
+ # CSV.foreach(csv_file_name, headers: true) do |row|
103
+ # @books_in_stock << BookInStock.new(
104
+ # row["ISBN"],
105
+ # row["Price"]
106
+ # )
107
+ # end
108
+ # end
109
+
110
+ # def generate_csv(filename,column_names, rows)
111
+ # records = self.book_generator.first(rows)
112
+
113
+ # CSV.open(filename, "ab") do |csv|
114
+ # csv << column_names
115
+ # 10.times do
116
+ # records.each do |book|
117
+ # csv << [Date.new(2001,2,3), book.isbn, book.price]
118
+ # end
119
+ # end
120
+ # end
121
+ # end
122
+
123
+ # def number_of_each_isbn
124
+
125
+ # counter_hash = {}
126
+
127
+ # @books_in_stock.each do |book|
128
+ # isbn = book.isbn
129
+ # if counter_hash.has_key?(isbn)
130
+ # counter_hash[isbn] = counter_hash[isbn]+1
131
+ # else
132
+ # counter_hash[isbn] = 1
133
+ # end
134
+ # end
135
+ # counter_hash
136
+ # end
137
+ end
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: large_csv_reader
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Arturo Castro
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-05-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: csv
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: date
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.0'
41
+ description: Large CSV builder and proccesor, it can help with creation of large csv
42
+ files, reading those files and make operation over the csv data
43
+ email: agcastro1994@gmail.com
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - README.md
49
+ - large_csv_reader.gemspec
50
+ - lib/large_csv_reader.rb
51
+ - lib/large_csv_reader/book_in_stock.rb
52
+ homepage:
53
+ licenses:
54
+ - ''
55
+ metadata: {}
56
+ post_install_message:
57
+ rdoc_options: []
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: '0'
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ requirements: []
71
+ rubygems_version: 3.3.14
72
+ signing_key:
73
+ specification_version: 4
74
+ summary: Large CSV builder and proccesor
75
+ test_files: []