kaggle 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +0 -19
- data/Rakefile +1 -1
- data/kaggle.gemspec +4 -4
- data/lib/kaggle/client.rb +70 -74
- data/lib/kaggle/constants.rb +5 -5
- data/lib/kaggle/version.rb +2 -2
- data/lib/kaggle.rb +1 -1
- metadata +40 -41
- data/plans/lists.md +0 -77
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9dd4c5fcd2e2e0f7841b00ba3b27d9dc75e898dc77bbc25b9ddbf10aad9f7561
|
4
|
+
data.tar.gz: e44814e2269cf74aafc044e60653c36523fa0868b4c1548da1083ece0866bf90
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9f81008f7591da868c6eea42414b36159aa6d6c4e3be1ffd18f89f85a0e6e2c120310e4c56f71d32bd406d2eaced158d96972ccd653c3da215f6cfb256eac7d2
|
7
|
+
data.tar.gz: f3215a4798e90fa5e3496f2364f5da4e5bad5adf982e470c54118c8a5afea3c9ebdca99251e3ba4bf76a9c8f64dbe90664f91d7bff1d6ecec600708cf12c34ec
|
data/README.md
CHANGED
@@ -78,19 +78,6 @@ client = Kaggle::Client.new(
|
|
78
78
|
)
|
79
79
|
```
|
80
80
|
|
81
|
-
### List Datasets
|
82
|
-
|
83
|
-
```ruby
|
84
|
-
# List all datasets
|
85
|
-
datasets = client.list_datasets
|
86
|
-
|
87
|
-
# Search datasets
|
88
|
-
datasets = client.list_datasets(search: 'housing')
|
89
|
-
|
90
|
-
# Paginate results
|
91
|
-
datasets = client.list_datasets(page: 2, page_size: 10)
|
92
|
-
```
|
93
|
-
|
94
81
|
### Download Datasets
|
95
82
|
|
96
83
|
```ruby
|
@@ -131,12 +118,6 @@ data = client.parse_csv_to_json('/path/to/file.csv')
|
|
131
118
|
The gem includes a command-line interface:
|
132
119
|
|
133
120
|
```bash
|
134
|
-
# List datasets
|
135
|
-
kaggle list
|
136
|
-
|
137
|
-
# Search datasets
|
138
|
-
kaggle list "housing"
|
139
|
-
|
140
121
|
# Download dataset
|
141
122
|
kaggle download zillow zecon
|
142
123
|
|
data/Rakefile
CHANGED
data/kaggle.gemspec
CHANGED
@@ -28,19 +28,19 @@ Gem::Specification.new do |spec|
|
|
28
28
|
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
29
29
|
spec.require_paths = ['lib']
|
30
30
|
|
31
|
-
spec.add_dependency 'httparty', '>= 0.23'
|
32
31
|
spec.add_dependency 'csv', '>= 3.3'
|
33
|
-
spec.add_dependency 'oj', '3.16.11'
|
34
32
|
spec.add_dependency 'fileutils', '>= 1.7'
|
33
|
+
spec.add_dependency 'httparty', '>= 0.23'
|
34
|
+
spec.add_dependency 'oj', '3.16.11'
|
35
35
|
spec.add_dependency 'rubyzip', '>= 2.0'
|
36
36
|
|
37
|
-
spec.add_development_dependency 'rake', '~> 13.3.0'
|
38
37
|
spec.add_development_dependency 'minitest', '~> 5.25.5'
|
39
38
|
spec.add_development_dependency 'minitest-focus', '~> 1.4.0'
|
40
39
|
spec.add_development_dependency 'minitest-reporters', '~> 1.7.1'
|
41
|
-
spec.add_development_dependency 'webmock', '~> 3.24.0'
|
42
40
|
spec.add_development_dependency 'mocha', '~> 2.4.5'
|
43
41
|
spec.add_development_dependency 'pry', '~> 0.15.2'
|
42
|
+
spec.add_development_dependency 'rake', '~> 13.3.0'
|
44
43
|
spec.add_development_dependency 'simplecov', '~> 0.22.0'
|
45
44
|
spec.add_development_dependency 'timecop', '~> 0.9.10'
|
45
|
+
spec.add_development_dependency 'webmock', '~> 3.24.0'
|
46
46
|
end
|
data/lib/kaggle/client.rb
CHANGED
@@ -1,102 +1,99 @@
|
|
1
1
|
module Kaggle
|
2
2
|
class Client
|
3
3
|
include HTTParty
|
4
|
-
|
4
|
+
|
5
5
|
base_uri Constants::BASE_URL
|
6
|
-
|
6
|
+
|
7
7
|
attr_reader :username, :api_key, :download_path, :cache_path, :timeout
|
8
|
-
|
9
|
-
def initialize(username: nil, api_key: nil, credentials_file: nil, download_path: nil, cache_path: nil,
|
8
|
+
|
9
|
+
def initialize(username: nil, api_key: nil, credentials_file: nil, download_path: nil, cache_path: nil,
|
10
|
+
timeout: nil)
|
10
11
|
load_credentials(username, api_key, credentials_file)
|
11
12
|
@download_path = download_path || Constants::DEFAULT_DOWNLOAD_PATH
|
12
13
|
@cache_path = cache_path || Constants::DEFAULT_CACHE_PATH
|
13
14
|
@timeout = timeout || Constants::DEFAULT_TIMEOUT
|
14
|
-
|
15
|
-
|
16
|
-
|
15
|
+
|
16
|
+
unless valid_credential?(@username) && valid_credential?(@api_key)
|
17
|
+
raise AuthenticationError,
|
18
|
+
'Username and API key are required'
|
19
|
+
end
|
20
|
+
|
17
21
|
ensure_directories_exist
|
18
22
|
setup_httparty_options
|
19
23
|
end
|
20
|
-
|
24
|
+
|
21
25
|
def download_dataset(dataset_owner, dataset_name, options = {})
|
22
26
|
dataset_path = "#{dataset_owner}/#{dataset_name}"
|
23
|
-
|
27
|
+
|
24
28
|
# Check cache first for parsed data
|
25
29
|
if options[:use_cache] && options[:parse_csv]
|
26
30
|
cache_key = generate_cache_key(dataset_path)
|
27
|
-
if cached_file_exists?(cache_key)
|
28
|
-
return load_from_cache(cache_key)
|
29
|
-
end
|
31
|
+
return load_from_cache(cache_key) if cached_file_exists?(cache_key)
|
30
32
|
end
|
31
|
-
|
33
|
+
|
32
34
|
# Check if we already have extracted files for this dataset
|
33
35
|
extracted_dir = get_extracted_dir(dataset_path)
|
34
36
|
if options[:use_cache] && Dir.exist?(extracted_dir) && !Dir.empty?(extracted_dir)
|
35
37
|
return handle_existing_dataset(extracted_dir, options)
|
36
38
|
end
|
37
|
-
|
39
|
+
|
38
40
|
# Download the zip file
|
39
41
|
response = authenticated_request(:get, "#{Constants::DATASET_ENDPOINTS[:download]}/#{dataset_path}")
|
40
|
-
|
41
|
-
unless response.success?
|
42
|
-
|
43
|
-
end
|
44
|
-
|
42
|
+
|
43
|
+
raise DownloadError, "Failed to download dataset: #{response.message}" unless response.success?
|
44
|
+
|
45
45
|
# Save zip file
|
46
46
|
zip_file = save_zip_file(dataset_path, response.body)
|
47
|
-
|
47
|
+
|
48
48
|
# Extract zip file
|
49
49
|
extract_zip_file(zip_file, extracted_dir)
|
50
|
-
|
50
|
+
|
51
51
|
# Clean up zip file
|
52
52
|
File.delete(zip_file) if File.exist?(zip_file)
|
53
|
-
|
53
|
+
|
54
54
|
# Handle the extracted files
|
55
55
|
result = handle_extracted_dataset(extracted_dir, options)
|
56
|
-
|
56
|
+
|
57
57
|
# Cache parsed CSV data if requested
|
58
58
|
if options[:use_cache] && options[:parse_csv] && (result.is_a?(Hash) || result.is_a?(Array))
|
59
59
|
cache_key = generate_cache_key(dataset_path)
|
60
60
|
cache_parsed_data(cache_key, result)
|
61
61
|
end
|
62
|
-
|
62
|
+
|
63
63
|
result
|
64
64
|
end
|
65
|
-
|
66
|
-
|
65
|
+
|
67
66
|
def dataset_files(dataset_owner, dataset_name)
|
68
67
|
dataset_path = "#{dataset_owner}/#{dataset_name}"
|
69
68
|
response = authenticated_request(:get, "#{Constants::DATASET_ENDPOINTS[:files]}/#{dataset_path}")
|
70
|
-
|
71
|
-
unless response.success?
|
72
|
-
|
73
|
-
end
|
74
|
-
|
69
|
+
|
70
|
+
raise DatasetNotFoundError, "Dataset not found or accessible: #{dataset_path}" unless response.success?
|
71
|
+
|
75
72
|
Oj.load(response.body)
|
76
73
|
rescue Oj::ParseError => e
|
77
74
|
raise ParseError, "Failed to parse dataset files response: #{e.message}"
|
78
75
|
end
|
79
|
-
|
76
|
+
|
80
77
|
def parse_csv_to_json(file_path)
|
81
78
|
raise Error, "File does not exist: #{file_path}" unless File.exist?(file_path)
|
82
79
|
raise Error, "File is not a CSV: #{file_path}" unless csv_file?(file_path)
|
83
|
-
|
80
|
+
|
84
81
|
data = []
|
85
82
|
CSV.foreach(file_path, headers: true) do |row|
|
86
83
|
data << row.to_hash
|
87
84
|
end
|
88
|
-
|
85
|
+
|
89
86
|
data
|
90
87
|
rescue CSV::MalformedCSVError => e
|
91
88
|
raise ParseError, "Failed to parse CSV file: #{e.message}"
|
92
89
|
end
|
93
|
-
|
90
|
+
|
94
91
|
private
|
95
|
-
|
92
|
+
|
96
93
|
def valid_credential?(credential)
|
97
94
|
credential && !credential.to_s.strip.empty?
|
98
95
|
end
|
99
|
-
|
96
|
+
|
100
97
|
def load_credentials(username, api_key, credentials_file)
|
101
98
|
# Try provided credentials file first
|
102
99
|
if credentials_file && File.exist?(credentials_file)
|
@@ -114,63 +111,63 @@ module Kaggle
|
|
114
111
|
@api_key = api_key || ENV['KAGGLE_KEY']
|
115
112
|
end
|
116
113
|
end
|
117
|
-
|
114
|
+
|
118
115
|
def load_credentials_from_file(file_path)
|
119
116
|
content = File.read(file_path)
|
120
117
|
Oj.load(content)
|
121
118
|
rescue Oj::ParseError => e
|
122
119
|
raise AuthenticationError, "Invalid credentials file format: #{e.message}"
|
123
|
-
rescue => e
|
120
|
+
rescue StandardError => e
|
124
121
|
raise AuthenticationError, "Failed to read credentials file: #{e.message}"
|
125
122
|
end
|
126
|
-
|
123
|
+
|
127
124
|
def ensure_directories_exist
|
128
125
|
FileUtils.mkdir_p(@download_path) unless Dir.exist?(@download_path)
|
129
126
|
FileUtils.mkdir_p(@cache_path) unless Dir.exist?(@cache_path)
|
130
127
|
end
|
131
|
-
|
128
|
+
|
132
129
|
def setup_httparty_options
|
133
130
|
self.class.default_options.merge!({
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
end
|
142
|
-
|
131
|
+
headers: Constants::REQUIRED_HEADERS,
|
132
|
+
timeout: @timeout,
|
133
|
+
basic_auth: {
|
134
|
+
username: @username,
|
135
|
+
password: @api_key
|
136
|
+
}
|
137
|
+
})
|
138
|
+
end
|
139
|
+
|
143
140
|
def authenticated_request(method, endpoint, options = {})
|
144
141
|
self.class.send(method, endpoint, options)
|
145
142
|
rescue Timeout::Error, Net::ReadTimeout, Net::OpenTimeout
|
146
143
|
raise Error, 'Request timed out'
|
147
|
-
rescue => e
|
144
|
+
rescue StandardError => e
|
148
145
|
raise Error, "Request failed: #{e.message}"
|
149
146
|
end
|
150
|
-
|
147
|
+
|
151
148
|
def get_extracted_dir(dataset_path)
|
152
149
|
dir_name = dataset_path.gsub('/', '_')
|
153
150
|
File.join(@download_path, dir_name)
|
154
151
|
end
|
155
|
-
|
152
|
+
|
156
153
|
def save_zip_file(dataset_path, content)
|
157
154
|
filename = "#{dataset_path.gsub('/', '_')}.zip"
|
158
155
|
file_path = File.join(@download_path, filename)
|
159
|
-
|
156
|
+
|
160
157
|
File.open(file_path, 'wb') do |file|
|
161
158
|
file.write(content)
|
162
159
|
end
|
163
|
-
|
160
|
+
|
164
161
|
file_path
|
165
162
|
end
|
166
|
-
|
163
|
+
|
167
164
|
def extract_zip_file(zip_file_path, extract_to_dir)
|
168
165
|
FileUtils.mkdir_p(extract_to_dir)
|
169
|
-
|
166
|
+
|
170
167
|
Zip::File.open(zip_file_path) do |zip_file|
|
171
168
|
zip_file.each do |entry|
|
172
169
|
extract_path = File.join(extract_to_dir, entry.name)
|
173
|
-
|
170
|
+
|
174
171
|
if entry.directory?
|
175
172
|
# Create directory
|
176
173
|
FileUtils.mkdir_p(extract_path)
|
@@ -178,7 +175,7 @@ module Kaggle
|
|
178
175
|
# Create parent directory if it doesn't exist
|
179
176
|
parent_dir = File.dirname(extract_path)
|
180
177
|
FileUtils.mkdir_p(parent_dir) unless Dir.exist?(parent_dir)
|
181
|
-
|
178
|
+
|
182
179
|
# Extract file manually to avoid path issues
|
183
180
|
File.open(extract_path, 'wb') do |f|
|
184
181
|
f.write entry.get_input_stream.read
|
@@ -189,16 +186,16 @@ module Kaggle
|
|
189
186
|
rescue Zip::Error => e
|
190
187
|
raise DownloadError, "Failed to extract zip file: #{e.message}"
|
191
188
|
end
|
192
|
-
|
189
|
+
|
193
190
|
def handle_existing_dataset(extracted_dir, options)
|
194
191
|
if options[:parse_csv]
|
195
192
|
csv_files = find_csv_files(extracted_dir)
|
196
193
|
return parse_csv_files_to_json(csv_files) unless csv_files.empty?
|
197
194
|
end
|
198
|
-
|
195
|
+
|
199
196
|
extracted_dir
|
200
197
|
end
|
201
|
-
|
198
|
+
|
202
199
|
def handle_extracted_dataset(extracted_dir, options)
|
203
200
|
if options[:parse_csv]
|
204
201
|
csv_files = find_csv_files(extracted_dir)
|
@@ -207,49 +204,48 @@ module Kaggle
|
|
207
204
|
return parsed_data
|
208
205
|
end
|
209
206
|
end
|
210
|
-
|
207
|
+
|
211
208
|
extracted_dir
|
212
209
|
end
|
213
|
-
|
210
|
+
|
214
211
|
def find_csv_files(directory)
|
215
212
|
Dir.glob(File.join(directory, '**', '*.csv'))
|
216
213
|
end
|
217
|
-
|
214
|
+
|
218
215
|
def parse_csv_files_to_json(csv_files)
|
219
216
|
result = {}
|
220
|
-
|
217
|
+
|
221
218
|
csv_files.each do |csv_file|
|
222
219
|
file_name = File.basename(csv_file, '.csv')
|
223
220
|
result[file_name] = parse_csv_to_json(csv_file)
|
224
221
|
end
|
225
|
-
|
222
|
+
|
226
223
|
# If there's only one CSV file, return its data directly
|
227
224
|
result.length == 1 ? result.values.first : result
|
228
225
|
end
|
229
|
-
|
226
|
+
|
230
227
|
def generate_cache_key(dataset_path)
|
231
228
|
"#{dataset_path.gsub('/', '_')}_parsed.json"
|
232
229
|
end
|
233
|
-
|
230
|
+
|
234
231
|
def cached_file_exists?(cache_key)
|
235
232
|
File.exist?(File.join(@cache_path, cache_key))
|
236
233
|
end
|
237
|
-
|
234
|
+
|
238
235
|
def load_from_cache(cache_key)
|
239
236
|
cache_file_path = File.join(@cache_path, cache_key)
|
240
237
|
Oj.load(File.read(cache_file_path))
|
241
238
|
rescue Oj::ParseError => e
|
242
239
|
raise ParseError, "Failed to parse cached data: #{e.message}"
|
243
240
|
end
|
244
|
-
|
241
|
+
|
245
242
|
def cache_parsed_data(cache_key, data)
|
246
243
|
cache_file_path = File.join(@cache_path, cache_key)
|
247
244
|
File.write(cache_file_path, Oj.dump(data, mode: :compat, indent: 2))
|
248
245
|
end
|
249
|
-
|
246
|
+
|
250
247
|
def csv_file?(file_path)
|
251
248
|
File.extname(file_path).downcase == '.csv'
|
252
249
|
end
|
253
|
-
|
254
250
|
end
|
255
|
-
end
|
251
|
+
end
|
data/lib/kaggle/constants.rb
CHANGED
@@ -1,23 +1,23 @@
|
|
1
1
|
module Kaggle
|
2
2
|
module Constants
|
3
3
|
BASE_URL = 'https://www.kaggle.com/api/v1'
|
4
|
-
|
4
|
+
|
5
5
|
DEFAULT_DOWNLOAD_PATH = './downloads'
|
6
6
|
DEFAULT_CACHE_PATH = './cache'
|
7
7
|
DEFAULT_CREDENTIALS_FILE = './kaggle.json'
|
8
8
|
DEFAULT_TIMEOUT = 30
|
9
|
-
|
9
|
+
|
10
10
|
SUPPORTED_FORMATS = %w[csv json].freeze
|
11
|
-
|
11
|
+
|
12
12
|
DATASET_ENDPOINTS = {
|
13
13
|
view: '/datasets/view',
|
14
14
|
download: '/datasets/download',
|
15
15
|
files: '/datasets/data'
|
16
16
|
}.freeze
|
17
|
-
|
17
|
+
|
18
18
|
REQUIRED_HEADERS = {
|
19
19
|
'User-Agent' => 'Kaggle Ruby Client/0.0.1',
|
20
20
|
'Accept' => 'application/json'
|
21
21
|
}.freeze
|
22
22
|
end
|
23
|
-
end
|
23
|
+
end
|
data/lib/kaggle/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module Kaggle
|
2
|
-
VERSION = '0.0.
|
3
|
-
end
|
2
|
+
VERSION = '0.0.2'
|
3
|
+
end
|
data/lib/kaggle.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kaggle
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Your Name
|
@@ -10,61 +10,61 @@ cert_chain: []
|
|
10
10
|
date: 1980-01-02 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
|
-
name:
|
13
|
+
name: csv
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
15
15
|
requirements:
|
16
16
|
- - ">="
|
17
17
|
- !ruby/object:Gem::Version
|
18
|
-
version: '
|
18
|
+
version: '3.3'
|
19
19
|
type: :runtime
|
20
20
|
prerelease: false
|
21
21
|
version_requirements: !ruby/object:Gem::Requirement
|
22
22
|
requirements:
|
23
23
|
- - ">="
|
24
24
|
- !ruby/object:Gem::Version
|
25
|
-
version: '
|
25
|
+
version: '3.3'
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
|
-
name:
|
27
|
+
name: fileutils
|
28
28
|
requirement: !ruby/object:Gem::Requirement
|
29
29
|
requirements:
|
30
30
|
- - ">="
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: '
|
32
|
+
version: '1.7'
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
35
|
version_requirements: !ruby/object:Gem::Requirement
|
36
36
|
requirements:
|
37
37
|
- - ">="
|
38
38
|
- !ruby/object:Gem::Version
|
39
|
-
version: '
|
39
|
+
version: '1.7'
|
40
40
|
- !ruby/object:Gem::Dependency
|
41
|
-
name:
|
41
|
+
name: httparty
|
42
42
|
requirement: !ruby/object:Gem::Requirement
|
43
43
|
requirements:
|
44
|
-
- -
|
44
|
+
- - ">="
|
45
45
|
- !ruby/object:Gem::Version
|
46
|
-
version:
|
46
|
+
version: '0.23'
|
47
47
|
type: :runtime
|
48
48
|
prerelease: false
|
49
49
|
version_requirements: !ruby/object:Gem::Requirement
|
50
50
|
requirements:
|
51
|
-
- -
|
51
|
+
- - ">="
|
52
52
|
- !ruby/object:Gem::Version
|
53
|
-
version:
|
53
|
+
version: '0.23'
|
54
54
|
- !ruby/object:Gem::Dependency
|
55
|
-
name:
|
55
|
+
name: oj
|
56
56
|
requirement: !ruby/object:Gem::Requirement
|
57
57
|
requirements:
|
58
|
-
- -
|
58
|
+
- - '='
|
59
59
|
- !ruby/object:Gem::Version
|
60
|
-
version:
|
60
|
+
version: 3.16.11
|
61
61
|
type: :runtime
|
62
62
|
prerelease: false
|
63
63
|
version_requirements: !ruby/object:Gem::Requirement
|
64
64
|
requirements:
|
65
|
-
- -
|
65
|
+
- - '='
|
66
66
|
- !ruby/object:Gem::Version
|
67
|
-
version:
|
67
|
+
version: 3.16.11
|
68
68
|
- !ruby/object:Gem::Dependency
|
69
69
|
name: rubyzip
|
70
70
|
requirement: !ruby/object:Gem::Requirement
|
@@ -79,20 +79,6 @@ dependencies:
|
|
79
79
|
- - ">="
|
80
80
|
- !ruby/object:Gem::Version
|
81
81
|
version: '2.0'
|
82
|
-
- !ruby/object:Gem::Dependency
|
83
|
-
name: rake
|
84
|
-
requirement: !ruby/object:Gem::Requirement
|
85
|
-
requirements:
|
86
|
-
- - "~>"
|
87
|
-
- !ruby/object:Gem::Version
|
88
|
-
version: 13.3.0
|
89
|
-
type: :development
|
90
|
-
prerelease: false
|
91
|
-
version_requirements: !ruby/object:Gem::Requirement
|
92
|
-
requirements:
|
93
|
-
- - "~>"
|
94
|
-
- !ruby/object:Gem::Version
|
95
|
-
version: 13.3.0
|
96
82
|
- !ruby/object:Gem::Dependency
|
97
83
|
name: minitest
|
98
84
|
requirement: !ruby/object:Gem::Requirement
|
@@ -136,47 +122,47 @@ dependencies:
|
|
136
122
|
- !ruby/object:Gem::Version
|
137
123
|
version: 1.7.1
|
138
124
|
- !ruby/object:Gem::Dependency
|
139
|
-
name:
|
125
|
+
name: mocha
|
140
126
|
requirement: !ruby/object:Gem::Requirement
|
141
127
|
requirements:
|
142
128
|
- - "~>"
|
143
129
|
- !ruby/object:Gem::Version
|
144
|
-
version:
|
130
|
+
version: 2.4.5
|
145
131
|
type: :development
|
146
132
|
prerelease: false
|
147
133
|
version_requirements: !ruby/object:Gem::Requirement
|
148
134
|
requirements:
|
149
135
|
- - "~>"
|
150
136
|
- !ruby/object:Gem::Version
|
151
|
-
version:
|
137
|
+
version: 2.4.5
|
152
138
|
- !ruby/object:Gem::Dependency
|
153
|
-
name:
|
139
|
+
name: pry
|
154
140
|
requirement: !ruby/object:Gem::Requirement
|
155
141
|
requirements:
|
156
142
|
- - "~>"
|
157
143
|
- !ruby/object:Gem::Version
|
158
|
-
version:
|
144
|
+
version: 0.15.2
|
159
145
|
type: :development
|
160
146
|
prerelease: false
|
161
147
|
version_requirements: !ruby/object:Gem::Requirement
|
162
148
|
requirements:
|
163
149
|
- - "~>"
|
164
150
|
- !ruby/object:Gem::Version
|
165
|
-
version:
|
151
|
+
version: 0.15.2
|
166
152
|
- !ruby/object:Gem::Dependency
|
167
|
-
name:
|
153
|
+
name: rake
|
168
154
|
requirement: !ruby/object:Gem::Requirement
|
169
155
|
requirements:
|
170
156
|
- - "~>"
|
171
157
|
- !ruby/object:Gem::Version
|
172
|
-
version:
|
158
|
+
version: 13.3.0
|
173
159
|
type: :development
|
174
160
|
prerelease: false
|
175
161
|
version_requirements: !ruby/object:Gem::Requirement
|
176
162
|
requirements:
|
177
163
|
- - "~>"
|
178
164
|
- !ruby/object:Gem::Version
|
179
|
-
version:
|
165
|
+
version: 13.3.0
|
180
166
|
- !ruby/object:Gem::Dependency
|
181
167
|
name: simplecov
|
182
168
|
requirement: !ruby/object:Gem::Requirement
|
@@ -205,6 +191,20 @@ dependencies:
|
|
205
191
|
- - "~>"
|
206
192
|
- !ruby/object:Gem::Version
|
207
193
|
version: 0.9.10
|
194
|
+
- !ruby/object:Gem::Dependency
|
195
|
+
name: webmock
|
196
|
+
requirement: !ruby/object:Gem::Requirement
|
197
|
+
requirements:
|
198
|
+
- - "~>"
|
199
|
+
- !ruby/object:Gem::Version
|
200
|
+
version: 3.24.0
|
201
|
+
type: :development
|
202
|
+
prerelease: false
|
203
|
+
version_requirements: !ruby/object:Gem::Requirement
|
204
|
+
requirements:
|
205
|
+
- - "~>"
|
206
|
+
- !ruby/object:Gem::Version
|
207
|
+
version: 3.24.0
|
208
208
|
description: A Ruby gem for interacting with the Kaggle API, including dataset downloads
|
209
209
|
with caching support
|
210
210
|
email:
|
@@ -228,7 +228,6 @@ files:
|
|
228
228
|
- plans/benchmarks.md
|
229
229
|
- plans/cli_tool.md
|
230
230
|
- plans/initial_prompt.md
|
231
|
-
- plans/lists.md
|
232
231
|
- plans/models.md
|
233
232
|
- plans/roadmap.md
|
234
233
|
homepage: https://github.com/yourusername/kaggle
|
data/plans/lists.md
DELETED
@@ -1,77 +0,0 @@
|
|
1
|
-
# Lists Enhancement Plan
|
2
|
-
|
3
|
-
## Overview
|
4
|
-
Expand the current listing functionality to provide comprehensive discovery and filtering capabilities for Kaggle resources.
|
5
|
-
|
6
|
-
## Current State
|
7
|
-
- Basic dataset listing with search and pagination
|
8
|
-
- Simple dataset file listing
|
9
|
-
|
10
|
-
## Planned Enhancements
|
11
|
-
|
12
|
-
### Phase 1: Enhanced Dataset Lists
|
13
|
-
- [ ] **Advanced Filtering**: Filter by license, file formats, size, update date
|
14
|
-
- [ ] **Sorting Options**: Sort by popularity, date, size, downloads
|
15
|
-
- [ ] **Category Browsing**: Browse datasets by category/topic
|
16
|
-
- [ ] **User/Organization Datasets**: List datasets by specific users or organizations
|
17
|
-
- [ ] **Featured Datasets**: Highlight trending or featured datasets
|
18
|
-
|
19
|
-
### Phase 2: Competition Lists
|
20
|
-
- [ ] **Competition Discovery**: List active, completed, and upcoming competitions
|
21
|
-
- [ ] **Competition Filtering**: Filter by category, prize pool, participant count
|
22
|
-
- [ ] **Competition Search**: Search competitions by title, description, tags
|
23
|
-
- [ ] **Personal Competitions**: List user's participated competitions
|
24
|
-
- [ ] **Competition Metrics**: Show participation stats, deadlines, prizes
|
25
|
-
|
26
|
-
### Phase 3: Model Lists
|
27
|
-
- [ ] **Model Discovery**: List available models and frameworks
|
28
|
-
- [ ] **Model Filtering**: Filter by framework, task type, performance metrics
|
29
|
-
- [ ] **Model Versions**: Track different versions of models
|
30
|
-
- [ ] **Popular Models**: Highlight trending and highly-rated models
|
31
|
-
- [ ] **User Models**: List models by specific users
|
32
|
-
|
33
|
-
### Phase 4: Kernel/Notebook Lists
|
34
|
-
- [ ] **Code Discovery**: List public kernels and notebooks
|
35
|
-
- [ ] **Language Filtering**: Filter by programming language (R, Python, etc.)
|
36
|
-
- [ ] **Topic Browsing**: Browse by dataset or competition
|
37
|
-
- [ ] **Popular Code**: Highlight most-voted and most-forked notebooks
|
38
|
-
- [ ] **Recent Activity**: Show recently updated kernels
|
39
|
-
|
40
|
-
## Technical Implementation
|
41
|
-
|
42
|
-
### API Endpoints
|
43
|
-
- Implement consistent pagination across all list types
|
44
|
-
- Add caching layer for frequently accessed lists
|
45
|
-
- Support bulk operations for multiple list requests
|
46
|
-
|
47
|
-
### CLI Enhancements
|
48
|
-
- Interactive filtering and sorting in CLI
|
49
|
-
- Export capabilities (CSV, JSON, XML)
|
50
|
-
- Bookmarking and favorites functionality
|
51
|
-
- Watchlist for monitoring specific items
|
52
|
-
|
53
|
-
### Data Structures
|
54
|
-
```ruby
|
55
|
-
# Enhanced listing response format
|
56
|
-
{
|
57
|
-
items: [], # List of resources
|
58
|
-
pagination: { # Pagination metadata
|
59
|
-
page: 1,
|
60
|
-
per_page: 20,
|
61
|
-
total_pages: 50,
|
62
|
-
total_count: 1000
|
63
|
-
},
|
64
|
-
filters: { # Applied filters
|
65
|
-
category: 'finance',
|
66
|
-
license: 'cc-by',
|
67
|
-
updated_since: '2023-01-01'
|
68
|
-
},
|
69
|
-
sort: { # Current sorting
|
70
|
-
field: 'popularity',
|
71
|
-
direction: 'desc'
|
72
|
-
}
|
73
|
-
}
|
74
|
-
```
|
75
|
-
|
76
|
-
## Priority: High
|
77
|
-
Target completion: Version 0.3.0
|