kaggle 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +0 -19
- data/Rakefile +1 -1
- data/kaggle.gemspec +4 -4
- data/lib/kaggle/client.rb +82 -76
- data/lib/kaggle/constants.rb +5 -5
- data/lib/kaggle/version.rb +2 -2
- data/lib/kaggle.rb +2 -1
- metadata +40 -41
- data/plans/lists.md +0 -77
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 56c5fd9c27bd8cdc20423167b912f171ee07d37e0413cfbbbcba2ea38140b7c6
|
4
|
+
data.tar.gz: 3f37e295dc32362f7606803790225af6a707021ba78b239d4f2a29567102fee6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 41b60fa4d87d5a78778247acdbf620bbbcb2a23add74b07c76ff25fa2a02ed5675e8fd3aa27dce6352c2c49c0dfcfb90f54d55cbd1219cb81b04c6a2b371d086
|
7
|
+
data.tar.gz: c3b887147a1e0aeb38fdf1440b63f3c722e8375feaab45bbc0a5e4e434b0a8f7cb06ea71437fe08d888704c732d60629c7e3f18443c40e339abb72c88a56e667
|
data/README.md
CHANGED
@@ -78,19 +78,6 @@ client = Kaggle::Client.new(
|
|
78
78
|
)
|
79
79
|
```
|
80
80
|
|
81
|
-
### List Datasets
|
82
|
-
|
83
|
-
```ruby
|
84
|
-
# List all datasets
|
85
|
-
datasets = client.list_datasets
|
86
|
-
|
87
|
-
# Search datasets
|
88
|
-
datasets = client.list_datasets(search: 'housing')
|
89
|
-
|
90
|
-
# Paginate results
|
91
|
-
datasets = client.list_datasets(page: 2, page_size: 10)
|
92
|
-
```
|
93
|
-
|
94
81
|
### Download Datasets
|
95
82
|
|
96
83
|
```ruby
|
@@ -131,12 +118,6 @@ data = client.parse_csv_to_json('/path/to/file.csv')
|
|
131
118
|
The gem includes a command-line interface:
|
132
119
|
|
133
120
|
```bash
|
134
|
-
# List datasets
|
135
|
-
kaggle list
|
136
|
-
|
137
|
-
# Search datasets
|
138
|
-
kaggle list "housing"
|
139
|
-
|
140
121
|
# Download dataset
|
141
122
|
kaggle download zillow zecon
|
142
123
|
|
data/Rakefile
CHANGED
data/kaggle.gemspec
CHANGED
@@ -28,19 +28,19 @@ Gem::Specification.new do |spec|
|
|
28
28
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
29
29
|
spec.require_paths = ['lib']
|
30
30
|
|
31
|
-
spec.add_dependency 'httparty', '>= 0.23'
|
32
31
|
spec.add_dependency 'csv', '>= 3.3'
|
33
|
-
spec.add_dependency 'oj', '3.16.11'
|
34
32
|
spec.add_dependency 'fileutils', '>= 1.7'
|
33
|
+
spec.add_dependency 'httparty', '>= 0.23'
|
34
|
+
spec.add_dependency 'oj', '3.16.11'
|
35
35
|
spec.add_dependency 'rubyzip', '>= 2.0'
|
36
36
|
|
37
|
-
spec.add_development_dependency 'rake', '~> 13.3.0'
|
38
37
|
spec.add_development_dependency 'minitest', '~> 5.25.5'
|
39
38
|
spec.add_development_dependency 'minitest-focus', '~> 1.4.0'
|
40
39
|
spec.add_development_dependency 'minitest-reporters', '~> 1.7.1'
|
41
|
-
spec.add_development_dependency 'webmock', '~> 3.24.0'
|
42
40
|
spec.add_development_dependency 'mocha', '~> 2.4.5'
|
43
41
|
spec.add_development_dependency 'pry', '~> 0.15.2'
|
42
|
+
spec.add_development_dependency 'rake', '~> 13.3.0'
|
44
43
|
spec.add_development_dependency 'simplecov', '~> 0.22.0'
|
45
44
|
spec.add_development_dependency 'timecop', '~> 0.9.10'
|
45
|
+
spec.add_development_dependency 'webmock', '~> 3.24.0'
|
46
46
|
end
|
data/lib/kaggle/client.rb
CHANGED
@@ -1,102 +1,109 @@
|
|
1
1
|
module Kaggle
|
2
2
|
class Client
|
3
3
|
include HTTParty
|
4
|
-
|
4
|
+
|
5
5
|
base_uri Constants::BASE_URL
|
6
|
-
|
7
|
-
attr_reader :username, :api_key, :download_path, :cache_path, :timeout
|
8
|
-
|
9
|
-
def initialize(username: nil, api_key: nil, credentials_file: nil, download_path: nil, cache_path: nil,
|
6
|
+
|
7
|
+
attr_reader :username, :api_key, :download_path, :cache_path, :timeout, :cache_only
|
8
|
+
|
9
|
+
def initialize(username: nil, api_key: nil, credentials_file: nil, download_path: nil, cache_path: nil,
|
10
|
+
timeout: nil, cache_only: false)
|
10
11
|
load_credentials(username, api_key, credentials_file)
|
11
12
|
@download_path = download_path || Constants::DEFAULT_DOWNLOAD_PATH
|
12
13
|
@cache_path = cache_path || Constants::DEFAULT_CACHE_PATH
|
13
14
|
@timeout = timeout || Constants::DEFAULT_TIMEOUT
|
14
|
-
|
15
|
-
|
16
|
-
|
15
|
+
@cache_only = cache_only
|
16
|
+
|
17
|
+
unless cache_only || (valid_credential?(@username) && valid_credential?(@api_key))
|
18
|
+
raise AuthenticationError,
|
19
|
+
'Username and API key are required (or set cache_only: true for cache-only access)'
|
20
|
+
end
|
21
|
+
|
17
22
|
ensure_directories_exist
|
18
|
-
setup_httparty_options
|
23
|
+
setup_httparty_options unless cache_only
|
19
24
|
end
|
20
|
-
|
25
|
+
|
21
26
|
def download_dataset(dataset_owner, dataset_name, options = {})
|
22
27
|
dataset_path = "#{dataset_owner}/#{dataset_name}"
|
23
|
-
|
28
|
+
|
24
29
|
# Check cache first for parsed data
|
25
30
|
if options[:use_cache] && options[:parse_csv]
|
26
31
|
cache_key = generate_cache_key(dataset_path)
|
27
|
-
if cached_file_exists?(cache_key)
|
28
|
-
return load_from_cache(cache_key)
|
29
|
-
end
|
32
|
+
return load_from_cache(cache_key) if cached_file_exists?(cache_key)
|
30
33
|
end
|
31
|
-
|
34
|
+
|
32
35
|
# Check if we already have extracted files for this dataset
|
33
36
|
extracted_dir = get_extracted_dir(dataset_path)
|
34
37
|
if options[:use_cache] && Dir.exist?(extracted_dir) && !Dir.empty?(extracted_dir)
|
35
38
|
return handle_existing_dataset(extracted_dir, options)
|
36
39
|
end
|
37
|
-
|
40
|
+
|
41
|
+
# If cache_only mode and no cached data found, return nil or raise based on force_cache option
|
42
|
+
if @cache_only
|
43
|
+
if options[:force_cache]
|
44
|
+
raise CacheNotFoundError, "Dataset '#{dataset_path}' not found in cache and force_cache is enabled"
|
45
|
+
else
|
46
|
+
return nil # Gracefully return nil when cache_only but not forced
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
38
50
|
# Download the zip file
|
39
51
|
response = authenticated_request(:get, "#{Constants::DATASET_ENDPOINTS[:download]}/#{dataset_path}")
|
40
|
-
|
41
|
-
unless response.success?
|
42
|
-
|
43
|
-
end
|
44
|
-
|
52
|
+
|
53
|
+
raise DownloadError, "Failed to download dataset: #{response.message}" unless response.success?
|
54
|
+
|
45
55
|
# Save zip file
|
46
56
|
zip_file = save_zip_file(dataset_path, response.body)
|
47
|
-
|
57
|
+
|
48
58
|
# Extract zip file
|
49
59
|
extract_zip_file(zip_file, extracted_dir)
|
50
|
-
|
60
|
+
|
51
61
|
# Clean up zip file
|
52
62
|
File.delete(zip_file) if File.exist?(zip_file)
|
53
|
-
|
63
|
+
|
54
64
|
# Handle the extracted files
|
55
65
|
result = handle_extracted_dataset(extracted_dir, options)
|
56
|
-
|
66
|
+
|
57
67
|
# Cache parsed CSV data if requested
|
58
68
|
if options[:use_cache] && options[:parse_csv] && (result.is_a?(Hash) || result.is_a?(Array))
|
59
69
|
cache_key = generate_cache_key(dataset_path)
|
60
70
|
cache_parsed_data(cache_key, result)
|
61
71
|
end
|
62
|
-
|
72
|
+
|
63
73
|
result
|
64
74
|
end
|
65
|
-
|
66
|
-
|
75
|
+
|
67
76
|
def dataset_files(dataset_owner, dataset_name)
|
68
77
|
dataset_path = "#{dataset_owner}/#{dataset_name}"
|
69
78
|
response = authenticated_request(:get, "#{Constants::DATASET_ENDPOINTS[:files]}/#{dataset_path}")
|
70
|
-
|
71
|
-
unless response.success?
|
72
|
-
|
73
|
-
end
|
74
|
-
|
79
|
+
|
80
|
+
raise DatasetNotFoundError, "Dataset not found or accessible: #{dataset_path}" unless response.success?
|
81
|
+
|
75
82
|
Oj.load(response.body)
|
76
83
|
rescue Oj::ParseError => e
|
77
84
|
raise ParseError, "Failed to parse dataset files response: #{e.message}"
|
78
85
|
end
|
79
|
-
|
86
|
+
|
80
87
|
def parse_csv_to_json(file_path)
|
81
88
|
raise Error, "File does not exist: #{file_path}" unless File.exist?(file_path)
|
82
89
|
raise Error, "File is not a CSV: #{file_path}" unless csv_file?(file_path)
|
83
|
-
|
90
|
+
|
84
91
|
data = []
|
85
92
|
CSV.foreach(file_path, headers: true) do |row|
|
86
93
|
data << row.to_hash
|
87
94
|
end
|
88
|
-
|
95
|
+
|
89
96
|
data
|
90
97
|
rescue CSV::MalformedCSVError => e
|
91
98
|
raise ParseError, "Failed to parse CSV file: #{e.message}"
|
92
99
|
end
|
93
|
-
|
100
|
+
|
94
101
|
private
|
95
|
-
|
102
|
+
|
96
103
|
def valid_credential?(credential)
|
97
104
|
credential && !credential.to_s.strip.empty?
|
98
105
|
end
|
99
|
-
|
106
|
+
|
100
107
|
def load_credentials(username, api_key, credentials_file)
|
101
108
|
# Try provided credentials file first
|
102
109
|
if credentials_file && File.exist?(credentials_file)
|
@@ -114,63 +121,63 @@ module Kaggle
|
|
114
121
|
@api_key = api_key || ENV['KAGGLE_KEY']
|
115
122
|
end
|
116
123
|
end
|
117
|
-
|
124
|
+
|
118
125
|
def load_credentials_from_file(file_path)
|
119
126
|
content = File.read(file_path)
|
120
127
|
Oj.load(content)
|
121
128
|
rescue Oj::ParseError => e
|
122
129
|
raise AuthenticationError, "Invalid credentials file format: #{e.message}"
|
123
|
-
rescue => e
|
130
|
+
rescue StandardError => e
|
124
131
|
raise AuthenticationError, "Failed to read credentials file: #{e.message}"
|
125
132
|
end
|
126
|
-
|
133
|
+
|
127
134
|
def ensure_directories_exist
|
128
135
|
FileUtils.mkdir_p(@download_path) unless Dir.exist?(@download_path)
|
129
136
|
FileUtils.mkdir_p(@cache_path) unless Dir.exist?(@cache_path)
|
130
137
|
end
|
131
|
-
|
138
|
+
|
132
139
|
def setup_httparty_options
|
133
140
|
self.class.default_options.merge!({
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
end
|
142
|
-
|
141
|
+
headers: Constants::REQUIRED_HEADERS,
|
142
|
+
timeout: @timeout,
|
143
|
+
basic_auth: {
|
144
|
+
username: @username,
|
145
|
+
password: @api_key
|
146
|
+
}
|
147
|
+
})
|
148
|
+
end
|
149
|
+
|
143
150
|
def authenticated_request(method, endpoint, options = {})
|
144
151
|
self.class.send(method, endpoint, options)
|
145
152
|
rescue Timeout::Error, Net::ReadTimeout, Net::OpenTimeout
|
146
153
|
raise Error, 'Request timed out'
|
147
|
-
rescue => e
|
154
|
+
rescue StandardError => e
|
148
155
|
raise Error, "Request failed: #{e.message}"
|
149
156
|
end
|
150
|
-
|
157
|
+
|
151
158
|
def get_extracted_dir(dataset_path)
|
152
159
|
dir_name = dataset_path.gsub('/', '_')
|
153
160
|
File.join(@download_path, dir_name)
|
154
161
|
end
|
155
|
-
|
162
|
+
|
156
163
|
def save_zip_file(dataset_path, content)
|
157
164
|
filename = "#{dataset_path.gsub('/', '_')}.zip"
|
158
165
|
file_path = File.join(@download_path, filename)
|
159
|
-
|
166
|
+
|
160
167
|
File.open(file_path, 'wb') do |file|
|
161
168
|
file.write(content)
|
162
169
|
end
|
163
|
-
|
170
|
+
|
164
171
|
file_path
|
165
172
|
end
|
166
|
-
|
173
|
+
|
167
174
|
def extract_zip_file(zip_file_path, extract_to_dir)
|
168
175
|
FileUtils.mkdir_p(extract_to_dir)
|
169
|
-
|
176
|
+
|
170
177
|
Zip::File.open(zip_file_path) do |zip_file|
|
171
178
|
zip_file.each do |entry|
|
172
179
|
extract_path = File.join(extract_to_dir, entry.name)
|
173
|
-
|
180
|
+
|
174
181
|
if entry.directory?
|
175
182
|
# Create directory
|
176
183
|
FileUtils.mkdir_p(extract_path)
|
@@ -178,7 +185,7 @@ module Kaggle
|
|
178
185
|
# Create parent directory if it doesn't exist
|
179
186
|
parent_dir = File.dirname(extract_path)
|
180
187
|
FileUtils.mkdir_p(parent_dir) unless Dir.exist?(parent_dir)
|
181
|
-
|
188
|
+
|
182
189
|
# Extract file manually to avoid path issues
|
183
190
|
File.open(extract_path, 'wb') do |f|
|
184
191
|
f.write entry.get_input_stream.read
|
@@ -189,16 +196,16 @@ module Kaggle
|
|
189
196
|
rescue Zip::Error => e
|
190
197
|
raise DownloadError, "Failed to extract zip file: #{e.message}"
|
191
198
|
end
|
192
|
-
|
199
|
+
|
193
200
|
def handle_existing_dataset(extracted_dir, options)
|
194
201
|
if options[:parse_csv]
|
195
202
|
csv_files = find_csv_files(extracted_dir)
|
196
203
|
return parse_csv_files_to_json(csv_files) unless csv_files.empty?
|
197
204
|
end
|
198
|
-
|
205
|
+
|
199
206
|
extracted_dir
|
200
207
|
end
|
201
|
-
|
208
|
+
|
202
209
|
def handle_extracted_dataset(extracted_dir, options)
|
203
210
|
if options[:parse_csv]
|
204
211
|
csv_files = find_csv_files(extracted_dir)
|
@@ -207,49 +214,48 @@ module Kaggle
|
|
207
214
|
return parsed_data
|
208
215
|
end
|
209
216
|
end
|
210
|
-
|
217
|
+
|
211
218
|
extracted_dir
|
212
219
|
end
|
213
|
-
|
220
|
+
|
214
221
|
def find_csv_files(directory)
|
215
222
|
Dir.glob(File.join(directory, '**', '*.csv'))
|
216
223
|
end
|
217
|
-
|
224
|
+
|
218
225
|
def parse_csv_files_to_json(csv_files)
|
219
226
|
result = {}
|
220
|
-
|
227
|
+
|
221
228
|
csv_files.each do |csv_file|
|
222
229
|
file_name = File.basename(csv_file, '.csv')
|
223
230
|
result[file_name] = parse_csv_to_json(csv_file)
|
224
231
|
end
|
225
|
-
|
232
|
+
|
226
233
|
# If there's only one CSV file, return its data directly
|
227
234
|
result.length == 1 ? result.values.first : result
|
228
235
|
end
|
229
|
-
|
236
|
+
|
230
237
|
def generate_cache_key(dataset_path)
|
231
238
|
"#{dataset_path.gsub('/', '_')}_parsed.json"
|
232
239
|
end
|
233
|
-
|
240
|
+
|
234
241
|
def cached_file_exists?(cache_key)
|
235
242
|
File.exist?(File.join(@cache_path, cache_key))
|
236
243
|
end
|
237
|
-
|
244
|
+
|
238
245
|
def load_from_cache(cache_key)
|
239
246
|
cache_file_path = File.join(@cache_path, cache_key)
|
240
247
|
Oj.load(File.read(cache_file_path))
|
241
248
|
rescue Oj::ParseError => e
|
242
249
|
raise ParseError, "Failed to parse cached data: #{e.message}"
|
243
250
|
end
|
244
|
-
|
251
|
+
|
245
252
|
def cache_parsed_data(cache_key, data)
|
246
253
|
cache_file_path = File.join(@cache_path, cache_key)
|
247
254
|
File.write(cache_file_path, Oj.dump(data, mode: :compat, indent: 2))
|
248
255
|
end
|
249
|
-
|
256
|
+
|
250
257
|
def csv_file?(file_path)
|
251
258
|
File.extname(file_path).downcase == '.csv'
|
252
259
|
end
|
253
|
-
|
254
260
|
end
|
255
|
-
end
|
261
|
+
end
|
data/lib/kaggle/constants.rb
CHANGED
@@ -1,23 +1,23 @@
|
|
1
1
|
module Kaggle
|
2
2
|
module Constants
|
3
3
|
BASE_URL = 'https://www.kaggle.com/api/v1'
|
4
|
-
|
4
|
+
|
5
5
|
DEFAULT_DOWNLOAD_PATH = './downloads'
|
6
6
|
DEFAULT_CACHE_PATH = './cache'
|
7
7
|
DEFAULT_CREDENTIALS_FILE = './kaggle.json'
|
8
8
|
DEFAULT_TIMEOUT = 30
|
9
|
-
|
9
|
+
|
10
10
|
SUPPORTED_FORMATS = %w[csv json].freeze
|
11
|
-
|
11
|
+
|
12
12
|
DATASET_ENDPOINTS = {
|
13
13
|
view: '/datasets/view',
|
14
14
|
download: '/datasets/download',
|
15
15
|
files: '/datasets/data'
|
16
16
|
}.freeze
|
17
|
-
|
17
|
+
|
18
18
|
REQUIRED_HEADERS = {
|
19
19
|
'User-Agent' => 'Kaggle Ruby Client/0.0.1',
|
20
20
|
'Accept' => 'application/json'
|
21
21
|
}.freeze
|
22
22
|
end
|
23
|
-
end
|
23
|
+
end
|
data/lib/kaggle/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module Kaggle
|
2
|
-
VERSION = '0.0.
|
3
|
-
end
|
2
|
+
VERSION = '0.0.3'
|
3
|
+
end
|
data/lib/kaggle.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kaggle
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Your Name
|
@@ -10,61 +10,61 @@ cert_chain: []
|
|
10
10
|
date: 1980-01-02 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
|
-
name:
|
13
|
+
name: csv
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
15
15
|
requirements:
|
16
16
|
- - ">="
|
17
17
|
- !ruby/object:Gem::Version
|
18
|
-
version: '
|
18
|
+
version: '3.3'
|
19
19
|
type: :runtime
|
20
20
|
prerelease: false
|
21
21
|
version_requirements: !ruby/object:Gem::Requirement
|
22
22
|
requirements:
|
23
23
|
- - ">="
|
24
24
|
- !ruby/object:Gem::Version
|
25
|
-
version: '
|
25
|
+
version: '3.3'
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
|
-
name:
|
27
|
+
name: fileutils
|
28
28
|
requirement: !ruby/object:Gem::Requirement
|
29
29
|
requirements:
|
30
30
|
- - ">="
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: '
|
32
|
+
version: '1.7'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
35
|
version_requirements: !ruby/object:Gem::Requirement
|
36
36
|
requirements:
|
37
37
|
- - ">="
|
38
38
|
- !ruby/object:Gem::Version
|
39
|
-
version: '
|
39
|
+
version: '1.7'
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
|
-
name:
|
41
|
+
name: httparty
|
42
42
|
requirement: !ruby/object:Gem::Requirement
|
43
43
|
requirements:
|
44
|
-
- -
|
44
|
+
- - ">="
|
45
45
|
- !ruby/object:Gem::Version
|
46
|
-
version:
|
46
|
+
version: '0.23'
|
47
47
|
type: :runtime
|
48
48
|
prerelease: false
|
49
49
|
version_requirements: !ruby/object:Gem::Requirement
|
50
50
|
requirements:
|
51
|
-
- -
|
51
|
+
- - ">="
|
52
52
|
- !ruby/object:Gem::Version
|
53
|
-
version:
|
53
|
+
version: '0.23'
|
54
54
|
- !ruby/object:Gem::Dependency
|
55
|
-
name:
|
55
|
+
name: oj
|
56
56
|
requirement: !ruby/object:Gem::Requirement
|
57
57
|
requirements:
|
58
|
-
- -
|
58
|
+
- - '='
|
59
59
|
- !ruby/object:Gem::Version
|
60
|
-
version:
|
60
|
+
version: 3.16.11
|
61
61
|
type: :runtime
|
62
62
|
prerelease: false
|
63
63
|
version_requirements: !ruby/object:Gem::Requirement
|
64
64
|
requirements:
|
65
|
-
- -
|
65
|
+
- - '='
|
66
66
|
- !ruby/object:Gem::Version
|
67
|
-
version:
|
67
|
+
version: 3.16.11
|
68
68
|
- !ruby/object:Gem::Dependency
|
69
69
|
name: rubyzip
|
70
70
|
requirement: !ruby/object:Gem::Requirement
|
@@ -79,20 +79,6 @@ dependencies:
|
|
79
79
|
- - ">="
|
80
80
|
- !ruby/object:Gem::Version
|
81
81
|
version: '2.0'
|
82
|
-
- !ruby/object:Gem::Dependency
|
83
|
-
name: rake
|
84
|
-
requirement: !ruby/object:Gem::Requirement
|
85
|
-
requirements:
|
86
|
-
- - "~>"
|
87
|
-
- !ruby/object:Gem::Version
|
88
|
-
version: 13.3.0
|
89
|
-
type: :development
|
90
|
-
prerelease: false
|
91
|
-
version_requirements: !ruby/object:Gem::Requirement
|
92
|
-
requirements:
|
93
|
-
- - "~>"
|
94
|
-
- !ruby/object:Gem::Version
|
95
|
-
version: 13.3.0
|
96
82
|
- !ruby/object:Gem::Dependency
|
97
83
|
name: minitest
|
98
84
|
requirement: !ruby/object:Gem::Requirement
|
@@ -136,47 +122,47 @@ dependencies:
|
|
136
122
|
- !ruby/object:Gem::Version
|
137
123
|
version: 1.7.1
|
138
124
|
- !ruby/object:Gem::Dependency
|
139
|
-
name:
|
125
|
+
name: mocha
|
140
126
|
requirement: !ruby/object:Gem::Requirement
|
141
127
|
requirements:
|
142
128
|
- - "~>"
|
143
129
|
- !ruby/object:Gem::Version
|
144
|
-
version:
|
130
|
+
version: 2.4.5
|
145
131
|
type: :development
|
146
132
|
prerelease: false
|
147
133
|
version_requirements: !ruby/object:Gem::Requirement
|
148
134
|
requirements:
|
149
135
|
- - "~>"
|
150
136
|
- !ruby/object:Gem::Version
|
151
|
-
version:
|
137
|
+
version: 2.4.5
|
152
138
|
- !ruby/object:Gem::Dependency
|
153
|
-
name:
|
139
|
+
name: pry
|
154
140
|
requirement: !ruby/object:Gem::Requirement
|
155
141
|
requirements:
|
156
142
|
- - "~>"
|
157
143
|
- !ruby/object:Gem::Version
|
158
|
-
version:
|
144
|
+
version: 0.15.2
|
159
145
|
type: :development
|
160
146
|
prerelease: false
|
161
147
|
version_requirements: !ruby/object:Gem::Requirement
|
162
148
|
requirements:
|
163
149
|
- - "~>"
|
164
150
|
- !ruby/object:Gem::Version
|
165
|
-
version:
|
151
|
+
version: 0.15.2
|
166
152
|
- !ruby/object:Gem::Dependency
|
167
|
-
name:
|
153
|
+
name: rake
|
168
154
|
requirement: !ruby/object:Gem::Requirement
|
169
155
|
requirements:
|
170
156
|
- - "~>"
|
171
157
|
- !ruby/object:Gem::Version
|
172
|
-
version:
|
158
|
+
version: 13.3.0
|
173
159
|
type: :development
|
174
160
|
prerelease: false
|
175
161
|
version_requirements: !ruby/object:Gem::Requirement
|
176
162
|
requirements:
|
177
163
|
- - "~>"
|
178
164
|
- !ruby/object:Gem::Version
|
179
|
-
version:
|
165
|
+
version: 13.3.0
|
180
166
|
- !ruby/object:Gem::Dependency
|
181
167
|
name: simplecov
|
182
168
|
requirement: !ruby/object:Gem::Requirement
|
@@ -205,6 +191,20 @@ dependencies:
|
|
205
191
|
- - "~>"
|
206
192
|
- !ruby/object:Gem::Version
|
207
193
|
version: 0.9.10
|
194
|
+
- !ruby/object:Gem::Dependency
|
195
|
+
name: webmock
|
196
|
+
requirement: !ruby/object:Gem::Requirement
|
197
|
+
requirements:
|
198
|
+
- - "~>"
|
199
|
+
- !ruby/object:Gem::Version
|
200
|
+
version: 3.24.0
|
201
|
+
type: :development
|
202
|
+
prerelease: false
|
203
|
+
version_requirements: !ruby/object:Gem::Requirement
|
204
|
+
requirements:
|
205
|
+
- - "~>"
|
206
|
+
- !ruby/object:Gem::Version
|
207
|
+
version: 3.24.0
|
208
208
|
description: A Ruby gem for interacting with the Kaggle API, including dataset downloads
|
209
209
|
with caching support
|
210
210
|
email:
|
@@ -228,7 +228,6 @@ files:
|
|
228
228
|
- plans/benchmarks.md
|
229
229
|
- plans/cli_tool.md
|
230
230
|
- plans/initial_prompt.md
|
231
|
-
- plans/lists.md
|
232
231
|
- plans/models.md
|
233
232
|
- plans/roadmap.md
|
234
233
|
homepage: https://github.com/yourusername/kaggle
|
data/plans/lists.md
DELETED
@@ -1,77 +0,0 @@
|
|
1
|
-
# Lists Enhancement Plan
|
2
|
-
|
3
|
-
## Overview
|
4
|
-
Expand the current listing functionality to provide comprehensive discovery and filtering capabilities for Kaggle resources.
|
5
|
-
|
6
|
-
## Current State
|
7
|
-
- Basic dataset listing with search and pagination
|
8
|
-
- Simple dataset file listing
|
9
|
-
|
10
|
-
## Planned Enhancements
|
11
|
-
|
12
|
-
### Phase 1: Enhanced Dataset Lists
|
13
|
-
- [ ] **Advanced Filtering**: Filter by license, file formats, size, update date
|
14
|
-
- [ ] **Sorting Options**: Sort by popularity, date, size, downloads
|
15
|
-
- [ ] **Category Browsing**: Browse datasets by category/topic
|
16
|
-
- [ ] **User/Organization Datasets**: List datasets by specific users or organizations
|
17
|
-
- [ ] **Featured Datasets**: Highlight trending or featured datasets
|
18
|
-
|
19
|
-
### Phase 2: Competition Lists
|
20
|
-
- [ ] **Competition Discovery**: List active, completed, and upcoming competitions
|
21
|
-
- [ ] **Competition Filtering**: Filter by category, prize pool, participant count
|
22
|
-
- [ ] **Competition Search**: Search competitions by title, description, tags
|
23
|
-
- [ ] **Personal Competitions**: List user's participated competitions
|
24
|
-
- [ ] **Competition Metrics**: Show participation stats, deadlines, prizes
|
25
|
-
|
26
|
-
### Phase 3: Model Lists
|
27
|
-
- [ ] **Model Discovery**: List available models and frameworks
|
28
|
-
- [ ] **Model Filtering**: Filter by framework, task type, performance metrics
|
29
|
-
- [ ] **Model Versions**: Track different versions of models
|
30
|
-
- [ ] **Popular Models**: Highlight trending and highly-rated models
|
31
|
-
- [ ] **User Models**: List models by specific users
|
32
|
-
|
33
|
-
### Phase 4: Kernel/Notebook Lists
|
34
|
-
- [ ] **Code Discovery**: List public kernels and notebooks
|
35
|
-
- [ ] **Language Filtering**: Filter by programming language (R, Python, etc.)
|
36
|
-
- [ ] **Topic Browsing**: Browse by dataset or competition
|
37
|
-
- [ ] **Popular Code**: Highlight most-voted and most-forked notebooks
|
38
|
-
- [ ] **Recent Activity**: Show recently updated kernels
|
39
|
-
|
40
|
-
## Technical Implementation
|
41
|
-
|
42
|
-
### API Endpoints
|
43
|
-
- Implement consistent pagination across all list types
|
44
|
-
- Add caching layer for frequently accessed lists
|
45
|
-
- Support bulk operations for multiple list requests
|
46
|
-
|
47
|
-
### CLI Enhancements
|
48
|
-
- Interactive filtering and sorting in CLI
|
49
|
-
- Export capabilities (CSV, JSON, XML)
|
50
|
-
- Bookmarking and favorites functionality
|
51
|
-
- Watchlist for monitoring specific items
|
52
|
-
|
53
|
-
### Data Structures
|
54
|
-
```ruby
|
55
|
-
# Enhanced listing response format
|
56
|
-
{
|
57
|
-
items: [], # List of resources
|
58
|
-
pagination: { # Pagination metadata
|
59
|
-
page: 1,
|
60
|
-
per_page: 20,
|
61
|
-
total_pages: 50,
|
62
|
-
total_count: 1000
|
63
|
-
},
|
64
|
-
filters: { # Applied filters
|
65
|
-
category: 'finance',
|
66
|
-
license: 'cc-by',
|
67
|
-
updated_since: '2023-01-01'
|
68
|
-
},
|
69
|
-
sort: { # Current sorting
|
70
|
-
field: 'popularity',
|
71
|
-
direction: 'desc'
|
72
|
-
}
|
73
|
-
}
|
74
|
-
```
|
75
|
-
|
76
|
-
## Priority: High
|
77
|
-
Target completion: Version 0.3.0
|