large_csv_reader 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +32 -0
- data/large_csv_reader.gemspec +12 -0
- data/lib/large_csv_reader/book_in_stock.rb +8 -0
- data/lib/large_csv_reader.rb +137 -0
- metadata +75 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 12f98dccfdfdc9e1c4b9cfee61c45fabc328338707d3ea39bda7ad98935d9daf
|
4
|
+
data.tar.gz: 40ca5ad5f66d00b6ab02c7236c03cdb5d53889c32687415cf3e321e74a2a6144
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d587014c2ed754c4c82b557066bef98b41411a996c8e365112dab428559749eb2080ae15f5cb405738c915d38e08bf3b362821a33d7cc0847748b4a23742b126
|
7
|
+
data.tar.gz: 77c821fde2268965a976f88c4ecb99e55b7466f4626120cf65afddab81585d5654cb7b4e97fd1973c2008fe9c57f30353b24ee57d4b711ede74e00180ed756e3
|
data/README.md
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# Large CSV Reader GEM
|
2
|
+
|
3
|
+
This **gem** is created to help with a known issue, processing large **csv files** using **ruby,** without breaking our **RAM.** I used lazy enumeration to achieve this, that allows the methods to execute actions on one line at a time instead of loading millions of lines in memory at once.
|
4
|
+
|
5
|
+
I consider this **gem** in **Beta state**, proper testing suite is still missing and there are some improvements or extensions that could be useful to have in a full version of it.
|
6
|
+
|
7
|
+
|
8
|
+
# Installation
|
9
|
+
`gem install large_csv_reader`
|
10
|
+
- In your files:
|
11
|
+
`require 'large_csv_reader'`
|
12
|
+
|
13
|
+
# Usage
|
14
|
+
|
15
|
+
## General Methods
|
16
|
+
|Function|Description |
|
17
|
+
|--|--|
|
18
|
+
|`reader = LargeCsvReader.new`| creates a new instance of the reader|
|
19
|
+
|`reader.generate_csv(fileName, columnNames)`|creates a new csv file with the name and header names passed as parameters|
|
20
|
+
|`reader.append_to_csv(filename, rows=1000000,rowStructure)`|Add lines to the csv, this lines are generated with the rowStructure array parameter. If rows parameters is not present by default it will load 1 million lines to the file.|
|
21
|
+
|`reader.massive_read_in_csv_data (file_name)`|lazy load of each csv row into a list|
|
22
|
+
|`reader.massive_csv_builder(filename, column_names,rowMult="1")`|create a csv with millions of lines, the value of rowMult represents how many millions lines the file will have|
|
23
|
+
|`reader.row_generator(structure)`|generate rows on demand using enumeration|
|
24
|
+
|
25
|
+
## Specific Methods
|
26
|
+
The **rest** of the methods are considerations to solve a **test problem** with **book data** "**Date**", "**ISBN**", "**Price**"
|
27
|
+
|
28
|
+
- `massive_total_value_in_stock(csv_file_name)`
|
29
|
+
- `massive_number_of_each_isbn(csv_file_name)`
|
30
|
+
- `append_book_to_csv(filename,rows=1000000)`
|
31
|
+
- `book_generator`
|
32
|
+
|
@@ -0,0 +1,12 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'large_csv_reader'
|
3
|
+
s.version = '1.0.1'
|
4
|
+
s.summary = "Large CSV builder and proccesor"
|
5
|
+
s.description = "Large CSV builder and proccesor, it can help with creation of large csv files, reading those files and make operation over the csv data"
|
6
|
+
s.authors = ["Arturo Castro"]
|
7
|
+
s.email = 'agcastro1994@gmail.com'
|
8
|
+
s.files = ["lib/large_csv_reader.rb", "lib/large_csv_reader/book_in_stock.rb", "large_csv_reader.gemspec", "README.md"]
|
9
|
+
s.license = ''
|
10
|
+
s.add_dependency 'csv', '~> 1.0'
|
11
|
+
s.add_dependency 'date', '~> 1.0'
|
12
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'date'
|
3
|
+
require 'large_csv_reader/book_in_stock'
|
4
|
+
|
5
|
+
class LargeCsvReader
|
6
|
+
attr_accessor :books_in_stock
|
7
|
+
def initialize
|
8
|
+
@books_in_stock = []
|
9
|
+
end
|
10
|
+
|
11
|
+
def massive_csv_builder(filename,column_names,rowStructure,rowMultiplicator="1")
|
12
|
+
generate_csv(filename,column_names)
|
13
|
+
millions = rowMultiplicator.to_i
|
14
|
+
millions.times do
|
15
|
+
append_to_csv(filename, rowStructure)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def massive_book_csv_builder(filename,column_names,rowMultiplicator="1")
|
20
|
+
generate_csv(filename,column_names)
|
21
|
+
millions = rowMultiplicator.to_i
|
22
|
+
millions.times do
|
23
|
+
append_book_to_csv(filename)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def generate_csv(filename,column_names)
|
28
|
+
CSV.open(filename, "ab") do |csv|
|
29
|
+
csv << column_names
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def append_book_to_csv(filename,rows=1000000)
|
34
|
+
CSV.open(filename, "ab") do |csv|
|
35
|
+
self.book_generator.lazy.select {|book| csv << [Date.new(2001,2,3), book.isbn, book.price] }.first(rows)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def append_to_csv(filename,rows=1000000,rowStructure)
|
40
|
+
CSV.open(filename, "ab") do |csv|
|
41
|
+
self.row_generator(rowStructure).lazy.select {|row| csv << row }.first(rows)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def massive_read_in_csv_data(csv_file_name)
|
46
|
+
CSV.foreach(csv_file_name, headers: true).each_slice(1).each {|row| @books_in_stock << BookInStock.new(row[0][1], row[0][2])}
|
47
|
+
end
|
48
|
+
|
49
|
+
def massive_total_value_in_stock(csv_file_name)
|
50
|
+
CSV.foreach(csv_file_name, headers: true).each_slice(1).each.inject(0) {|sum, row| sum + row[0][2].to_f }
|
51
|
+
end
|
52
|
+
|
53
|
+
def massive_number_of_each_isbn(csv_file_name)
|
54
|
+
counter_hash = {}
|
55
|
+
CSV.foreach(csv_file_name, headers: true).each_slice(1).each {|row| number_of_each_isbn(counter_hash, row) }
|
56
|
+
return counter_hash
|
57
|
+
end
|
58
|
+
|
59
|
+
def number_of_each_isbn(counter_hash, bookInfo)
|
60
|
+
isbn = bookInfo[0][1]
|
61
|
+
if counter_hash.has_key?(isbn)
|
62
|
+
counter_hash[isbn] = counter_hash[isbn]+1
|
63
|
+
else
|
64
|
+
counter_hash[isbn] = 1
|
65
|
+
end
|
66
|
+
counter_hash
|
67
|
+
end
|
68
|
+
|
69
|
+
def book_generator
|
70
|
+
Enumerator.new do |caller|
|
71
|
+
testBook = BookInStock.new("978-1-9343561-0-4", 20.05)
|
72
|
+
loop do
|
73
|
+
caller.yield testBook
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def row_generator(structure)
|
79
|
+
Enumerator.new do |caller|
|
80
|
+
row = structure
|
81
|
+
loop do
|
82
|
+
caller.yield row
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# def add_new_book(book, filename)
|
88
|
+
# CSV.open(filename, "ab") do |csv|
|
89
|
+
# csv << [Date.new(2001,2,3), book.isbn, book.price]
|
90
|
+
# end
|
91
|
+
# end
|
92
|
+
|
93
|
+
# def total_value_in_stock
|
94
|
+
# sum = 0.0
|
95
|
+
# @books_in_stock.each do |book|
|
96
|
+
# sum += book.price
|
97
|
+
# end
|
98
|
+
# sum
|
99
|
+
# end
|
100
|
+
|
101
|
+
# def read_in_csv_data(csv_file_name)
|
102
|
+
# CSV.foreach(csv_file_name, headers: true) do |row|
|
103
|
+
# @books_in_stock << BookInStock.new(
|
104
|
+
# row["ISBN"],
|
105
|
+
# row["Price"]
|
106
|
+
# )
|
107
|
+
# end
|
108
|
+
# end
|
109
|
+
|
110
|
+
# def generate_csv(filename,column_names, rows)
|
111
|
+
# records = self.book_generator.first(rows)
|
112
|
+
|
113
|
+
# CSV.open(filename, "ab") do |csv|
|
114
|
+
# csv << column_names
|
115
|
+
# 10.times do
|
116
|
+
# records.each do |book|
|
117
|
+
# csv << [Date.new(2001,2,3), book.isbn, book.price]
|
118
|
+
# end
|
119
|
+
# end
|
120
|
+
# end
|
121
|
+
# end
|
122
|
+
|
123
|
+
# def number_of_each_isbn
|
124
|
+
|
125
|
+
# counter_hash = {}
|
126
|
+
|
127
|
+
# @books_in_stock.each do |book|
|
128
|
+
# isbn = book.isbn
|
129
|
+
# if counter_hash.has_key?(isbn)
|
130
|
+
# counter_hash[isbn] = counter_hash[isbn]+1
|
131
|
+
# else
|
132
|
+
# counter_hash[isbn] = 1
|
133
|
+
# end
|
134
|
+
# end
|
135
|
+
# counter_hash
|
136
|
+
# end
|
137
|
+
end
|
metadata
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: large_csv_reader
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Arturo Castro
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-05-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: csv
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: date
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.0'
|
41
|
+
description: Large CSV builder and proccesor, it can help with creation of large csv
|
42
|
+
files, reading those files and make operation over the csv data
|
43
|
+
email: agcastro1994@gmail.com
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- README.md
|
49
|
+
- large_csv_reader.gemspec
|
50
|
+
- lib/large_csv_reader.rb
|
51
|
+
- lib/large_csv_reader/book_in_stock.rb
|
52
|
+
homepage:
|
53
|
+
licenses:
|
54
|
+
- ''
|
55
|
+
metadata: {}
|
56
|
+
post_install_message:
|
57
|
+
rdoc_options: []
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
requirements: []
|
71
|
+
rubygems_version: 3.3.14
|
72
|
+
signing_key:
|
73
|
+
specification_version: 4
|
74
|
+
summary: Large CSV builder and proccesor
|
75
|
+
test_files: []
|