large_csv_reader 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +32 -0
- data/large_csv_reader.gemspec +12 -0
- data/lib/large_csv_reader/book_in_stock.rb +8 -0
- data/lib/large_csv_reader.rb +137 -0
- metadata +75 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 12f98dccfdfdc9e1c4b9cfee61c45fabc328338707d3ea39bda7ad98935d9daf
|
4
|
+
data.tar.gz: 40ca5ad5f66d00b6ab02c7236c03cdb5d53889c32687415cf3e321e74a2a6144
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d587014c2ed754c4c82b557066bef98b41411a996c8e365112dab428559749eb2080ae15f5cb405738c915d38e08bf3b362821a33d7cc0847748b4a23742b126
|
7
|
+
data.tar.gz: 77c821fde2268965a976f88c4ecb99e55b7466f4626120cf65afddab81585d5654cb7b4e97fd1973c2008fe9c57f30353b24ee57d4b711ede74e00180ed756e3
|
data/README.md
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# Large CSV Reader GEM
|
2
|
+
|
3
|
+
This **gem** is created to help with a known issue, processing large **csv files** using **ruby,** without breaking our **RAM.** I used lazy enumeration to achieve this, that allows the methods to execute actions on one line at a time instead of loading millions of lines in memory at once.
|
4
|
+
|
5
|
+
I consider this **gem** in **Beta state**, proper testing suite is still missing and there are some improvements or extensions that could be useful to have in a full version of it.
|
6
|
+
|
7
|
+
|
8
|
+
# Installation
|
9
|
+
`gem install large_csv_reader`
|
10
|
+
- In your files:
|
11
|
+
`require 'large_csv_reader'`
|
12
|
+
|
13
|
+
# Usage
|
14
|
+
|
15
|
+
## General Methods
|
16
|
+
|Function|Description |
|
17
|
+
|--|--|
|
18
|
+
|`reader = LargeCsvReader.new`| creates a new instance of the reader|
|
19
|
+
|`reader.generate_csv(fileName, columnNames)`|creates a new csv file with the name and header names passed as parameters|
|
20
|
+
|`reader.append_to_csv(filename, rows=1000000,rowStructure)`|Add lines to the csv, this lines are generated with the rowStructure array parameter. If rows parameters is not present by default it will load 1 million lines to the file.|
|
21
|
+
|`reader.massive_read_in_csv_data (file_name)`|lazy load of each csv row into a list|
|
22
|
+
|`reader.massive_csv_builder(filename, column_names,rowMult="1")`|create a csv with millions of lines, the value of rowMult represents how many millions lines the file will have|
|
23
|
+
|`reader.row_generator(structure)`|generate rows on demand using enumeration|
|
24
|
+
|
25
|
+
## Specific Methods
|
26
|
+
The **rest** of the methods are considerations to solve a **test problem** with **book data** "**Date**", "**ISBN**", "**Price**"
|
27
|
+
|
28
|
+
- `massive_total_value_in_stock(csv_file_name)`
|
29
|
+
- `massive_number_of_each_isbn(csv_file_name)`
|
30
|
+
- `append_book_to_csv(filename,rows=1000000)`
|
31
|
+
- `book_generator`
|
32
|
+
|
@@ -0,0 +1,12 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'large_csv_reader'
|
3
|
+
s.version = '1.0.1'
|
4
|
+
s.summary = "Large CSV builder and proccesor"
|
5
|
+
s.description = "Large CSV builder and proccesor, it can help with creation of large csv files, reading those files and make operation over the csv data"
|
6
|
+
s.authors = ["Arturo Castro"]
|
7
|
+
s.email = 'agcastro1994@gmail.com'
|
8
|
+
s.files = ["lib/large_csv_reader.rb", "lib/large_csv_reader/book_in_stock.rb", "large_csv_reader.gemspec", "README.md"]
|
9
|
+
s.license = ''
|
10
|
+
s.add_dependency 'csv', '~> 1.0'
|
11
|
+
s.add_dependency 'date', '~> 1.0'
|
12
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'date'
|
3
|
+
require 'large_csv_reader/book_in_stock'
|
4
|
+
|
5
|
+
class LargeCsvReader
|
6
|
+
attr_accessor :books_in_stock
|
7
|
+
def initialize
|
8
|
+
@books_in_stock = []
|
9
|
+
end
|
10
|
+
|
11
|
+
def massive_csv_builder(filename,column_names,rowStructure,rowMultiplicator="1")
|
12
|
+
generate_csv(filename,column_names)
|
13
|
+
millions = rowMultiplicator.to_i
|
14
|
+
millions.times do
|
15
|
+
append_to_csv(filename, rowStructure)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def massive_book_csv_builder(filename,column_names,rowMultiplicator="1")
|
20
|
+
generate_csv(filename,column_names)
|
21
|
+
millions = rowMultiplicator.to_i
|
22
|
+
millions.times do
|
23
|
+
append_book_to_csv(filename)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def generate_csv(filename,column_names)
|
28
|
+
CSV.open(filename, "ab") do |csv|
|
29
|
+
csv << column_names
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def append_book_to_csv(filename,rows=1000000)
|
34
|
+
CSV.open(filename, "ab") do |csv|
|
35
|
+
self.book_generator.lazy.select {|book| csv << [Date.new(2001,2,3), book.isbn, book.price] }.first(rows)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def append_to_csv(filename,rows=1000000,rowStructure)
|
40
|
+
CSV.open(filename, "ab") do |csv|
|
41
|
+
self.row_generator(rowStructure).lazy.select {|row| csv << row }.first(rows)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def massive_read_in_csv_data(csv_file_name)
|
46
|
+
CSV.foreach(csv_file_name, headers: true).each_slice(1).each {|row| @books_in_stock << BookInStock.new(row[0][1], row[0][2])}
|
47
|
+
end
|
48
|
+
|
49
|
+
def massive_total_value_in_stock(csv_file_name)
|
50
|
+
CSV.foreach(csv_file_name, headers: true).each_slice(1).each.inject(0) {|sum, row| sum + row[0][2].to_f }
|
51
|
+
end
|
52
|
+
|
53
|
+
def massive_number_of_each_isbn(csv_file_name)
|
54
|
+
counter_hash = {}
|
55
|
+
CSV.foreach(csv_file_name, headers: true).each_slice(1).each {|row| number_of_each_isbn(counter_hash, row) }
|
56
|
+
return counter_hash
|
57
|
+
end
|
58
|
+
|
59
|
+
def number_of_each_isbn(counter_hash, bookInfo)
|
60
|
+
isbn = bookInfo[0][1]
|
61
|
+
if counter_hash.has_key?(isbn)
|
62
|
+
counter_hash[isbn] = counter_hash[isbn]+1
|
63
|
+
else
|
64
|
+
counter_hash[isbn] = 1
|
65
|
+
end
|
66
|
+
counter_hash
|
67
|
+
end
|
68
|
+
|
69
|
+
def book_generator
|
70
|
+
Enumerator.new do |caller|
|
71
|
+
testBook = BookInStock.new("978-1-9343561-0-4", 20.05)
|
72
|
+
loop do
|
73
|
+
caller.yield testBook
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def row_generator(structure)
|
79
|
+
Enumerator.new do |caller|
|
80
|
+
row = structure
|
81
|
+
loop do
|
82
|
+
caller.yield row
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# def add_new_book(book, filename)
|
88
|
+
# CSV.open(filename, "ab") do |csv|
|
89
|
+
# csv << [Date.new(2001,2,3), book.isbn, book.price]
|
90
|
+
# end
|
91
|
+
# end
|
92
|
+
|
93
|
+
# def total_value_in_stock
|
94
|
+
# sum = 0.0
|
95
|
+
# @books_in_stock.each do |book|
|
96
|
+
# sum += book.price
|
97
|
+
# end
|
98
|
+
# sum
|
99
|
+
# end
|
100
|
+
|
101
|
+
# def read_in_csv_data(csv_file_name)
|
102
|
+
# CSV.foreach(csv_file_name, headers: true) do |row|
|
103
|
+
# @books_in_stock << BookInStock.new(
|
104
|
+
# row["ISBN"],
|
105
|
+
# row["Price"]
|
106
|
+
# )
|
107
|
+
# end
|
108
|
+
# end
|
109
|
+
|
110
|
+
# def generate_csv(filename,column_names, rows)
|
111
|
+
# records = self.book_generator.first(rows)
|
112
|
+
|
113
|
+
# CSV.open(filename, "ab") do |csv|
|
114
|
+
# csv << column_names
|
115
|
+
# 10.times do
|
116
|
+
# records.each do |book|
|
117
|
+
# csv << [Date.new(2001,2,3), book.isbn, book.price]
|
118
|
+
# end
|
119
|
+
# end
|
120
|
+
# end
|
121
|
+
# end
|
122
|
+
|
123
|
+
# def number_of_each_isbn
|
124
|
+
|
125
|
+
# counter_hash = {}
|
126
|
+
|
127
|
+
# @books_in_stock.each do |book|
|
128
|
+
# isbn = book.isbn
|
129
|
+
# if counter_hash.has_key?(isbn)
|
130
|
+
# counter_hash[isbn] = counter_hash[isbn]+1
|
131
|
+
# else
|
132
|
+
# counter_hash[isbn] = 1
|
133
|
+
# end
|
134
|
+
# end
|
135
|
+
# counter_hash
|
136
|
+
# end
|
137
|
+
end
|
metadata
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: large_csv_reader
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Arturo Castro
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-05-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: csv
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: date
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.0'
|
41
|
+
description: Large CSV builder and proccesor, it can help with creation of large csv
|
42
|
+
files, reading those files and make operation over the csv data
|
43
|
+
email: agcastro1994@gmail.com
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- README.md
|
49
|
+
- large_csv_reader.gemspec
|
50
|
+
- lib/large_csv_reader.rb
|
51
|
+
- lib/large_csv_reader/book_in_stock.rb
|
52
|
+
homepage:
|
53
|
+
licenses:
|
54
|
+
- ''
|
55
|
+
metadata: {}
|
56
|
+
post_install_message:
|
57
|
+
rdoc_options: []
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
requirements: []
|
71
|
+
rubygems_version: 3.3.14
|
72
|
+
signing_key:
|
73
|
+
specification_version: 4
|
74
|
+
summary: Large CSV builder and proccesor
|
75
|
+
test_files: []
|