kaggle 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3ad655835decf29a7a46e8b9c1d62a91bf05975bb8a694e3fac92b9f5f141eb7
4
- data.tar.gz: '0803369672874a9a8f275a53fca15baeb66b1f2a121b2d5bdf91b39349a3bab1'
3
+ metadata.gz: 56c5fd9c27bd8cdc20423167b912f171ee07d37e0413cfbbbcba2ea38140b7c6
4
+ data.tar.gz: 3f37e295dc32362f7606803790225af6a707021ba78b239d4f2a29567102fee6
5
5
  SHA512:
6
- metadata.gz: 947d6474751ade9122c0ec9fcb7d7f533a1b23dc9903ba6f59d3e88f20c4b1ec38ff70c53bb242d3a9cafca7f374bee0b307c2fb74ccc52add31fa7555906864
7
- data.tar.gz: ced4244587280c337dbab8455ffb133db8181631afed0de72bb2b0c773ea7f32b8eff6f30c0d7a6037d5d5ee59cd245e975f7c5f9bdf436da6c9ed9a490707cb
6
+ metadata.gz: 41b60fa4d87d5a78778247acdbf620bbbcb2a23add74b07c76ff25fa2a02ed5675e8fd3aa27dce6352c2c49c0dfcfb90f54d55cbd1219cb81b04c6a2b371d086
7
+ data.tar.gz: c3b887147a1e0aeb38fdf1440b63f3c722e8375feaab45bbc0a5e4e434b0a8f7cb06ea71437fe08d888704c732d60629c7e3f18443c40e339abb72c88a56e667
data/README.md CHANGED
@@ -78,19 +78,6 @@ client = Kaggle::Client.new(
78
78
  )
79
79
  ```
80
80
 
81
- ### List Datasets
82
-
83
- ```ruby
84
- # List all datasets
85
- datasets = client.list_datasets
86
-
87
- # Search datasets
88
- datasets = client.list_datasets(search: 'housing')
89
-
90
- # Paginate results
91
- datasets = client.list_datasets(page: 2, page_size: 10)
92
- ```
93
-
94
81
  ### Download Datasets
95
82
 
96
83
  ```ruby
@@ -131,12 +118,6 @@ data = client.parse_csv_to_json('/path/to/file.csv')
131
118
  The gem includes a command-line interface:
132
119
 
133
120
  ```bash
134
- # List datasets
135
- kaggle list
136
-
137
- # Search datasets
138
- kaggle list "housing"
139
-
140
121
  # Download dataset
141
122
  kaggle download zillow zecon
142
123
 
data/Rakefile CHANGED
@@ -7,4 +7,4 @@ Rake::TestTask.new(:test) do |t|
7
7
  t.test_files = FileList['test/**/*_test.rb']
8
8
  end
9
9
 
10
- task default: :test
10
+ task default: :test
data/kaggle.gemspec CHANGED
@@ -28,19 +28,19 @@ Gem::Specification.new do |spec|
28
28
  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
29
29
  spec.require_paths = ['lib']
30
30
 
31
- spec.add_dependency 'httparty', '>= 0.23'
32
31
  spec.add_dependency 'csv', '>= 3.3'
33
- spec.add_dependency 'oj', '3.16.11'
34
32
  spec.add_dependency 'fileutils', '>= 1.7'
33
+ spec.add_dependency 'httparty', '>= 0.23'
34
+ spec.add_dependency 'oj', '3.16.11'
35
35
  spec.add_dependency 'rubyzip', '>= 2.0'
36
36
 
37
- spec.add_development_dependency 'rake', '~> 13.3.0'
38
37
  spec.add_development_dependency 'minitest', '~> 5.25.5'
39
38
  spec.add_development_dependency 'minitest-focus', '~> 1.4.0'
40
39
  spec.add_development_dependency 'minitest-reporters', '~> 1.7.1'
41
- spec.add_development_dependency 'webmock', '~> 3.24.0'
42
40
  spec.add_development_dependency 'mocha', '~> 2.4.5'
43
41
  spec.add_development_dependency 'pry', '~> 0.15.2'
42
+ spec.add_development_dependency 'rake', '~> 13.3.0'
44
43
  spec.add_development_dependency 'simplecov', '~> 0.22.0'
45
44
  spec.add_development_dependency 'timecop', '~> 0.9.10'
45
+ spec.add_development_dependency 'webmock', '~> 3.24.0'
46
46
  end
data/lib/kaggle/client.rb CHANGED
@@ -1,102 +1,109 @@
1
1
  module Kaggle
2
2
  class Client
3
3
  include HTTParty
4
-
4
+
5
5
  base_uri Constants::BASE_URL
6
-
7
- attr_reader :username, :api_key, :download_path, :cache_path, :timeout
8
-
9
- def initialize(username: nil, api_key: nil, credentials_file: nil, download_path: nil, cache_path: nil, timeout: nil)
6
+
7
+ attr_reader :username, :api_key, :download_path, :cache_path, :timeout, :cache_only
8
+
9
+ def initialize(username: nil, api_key: nil, credentials_file: nil, download_path: nil, cache_path: nil,
10
+ timeout: nil, cache_only: false)
10
11
  load_credentials(username, api_key, credentials_file)
11
12
  @download_path = download_path || Constants::DEFAULT_DOWNLOAD_PATH
12
13
  @cache_path = cache_path || Constants::DEFAULT_CACHE_PATH
13
14
  @timeout = timeout || Constants::DEFAULT_TIMEOUT
14
-
15
- raise AuthenticationError, 'Username and API key are required' unless valid_credential?(@username) && valid_credential?(@api_key)
16
-
15
+ @cache_only = cache_only
16
+
17
+ unless cache_only || (valid_credential?(@username) && valid_credential?(@api_key))
18
+ raise AuthenticationError,
19
+ 'Username and API key are required (or set cache_only: true for cache-only access)'
20
+ end
21
+
17
22
  ensure_directories_exist
18
- setup_httparty_options
23
+ setup_httparty_options unless cache_only
19
24
  end
20
-
25
+
21
26
  def download_dataset(dataset_owner, dataset_name, options = {})
22
27
  dataset_path = "#{dataset_owner}/#{dataset_name}"
23
-
28
+
24
29
  # Check cache first for parsed data
25
30
  if options[:use_cache] && options[:parse_csv]
26
31
  cache_key = generate_cache_key(dataset_path)
27
- if cached_file_exists?(cache_key)
28
- return load_from_cache(cache_key)
29
- end
32
+ return load_from_cache(cache_key) if cached_file_exists?(cache_key)
30
33
  end
31
-
34
+
32
35
  # Check if we already have extracted files for this dataset
33
36
  extracted_dir = get_extracted_dir(dataset_path)
34
37
  if options[:use_cache] && Dir.exist?(extracted_dir) && !Dir.empty?(extracted_dir)
35
38
  return handle_existing_dataset(extracted_dir, options)
36
39
  end
37
-
40
+
41
+ # If cache_only mode and no cached data found, return nil or raise based on force_cache option
42
+ if @cache_only
43
+ if options[:force_cache]
44
+ raise CacheNotFoundError, "Dataset '#{dataset_path}' not found in cache and force_cache is enabled"
45
+ else
46
+ return nil # Gracefully return nil when cache_only but not forced
47
+ end
48
+ end
49
+
38
50
  # Download the zip file
39
51
  response = authenticated_request(:get, "#{Constants::DATASET_ENDPOINTS[:download]}/#{dataset_path}")
40
-
41
- unless response.success?
42
- raise DownloadError, "Failed to download dataset: #{response.message}"
43
- end
44
-
52
+
53
+ raise DownloadError, "Failed to download dataset: #{response.message}" unless response.success?
54
+
45
55
  # Save zip file
46
56
  zip_file = save_zip_file(dataset_path, response.body)
47
-
57
+
48
58
  # Extract zip file
49
59
  extract_zip_file(zip_file, extracted_dir)
50
-
60
+
51
61
  # Clean up zip file
52
62
  File.delete(zip_file) if File.exist?(zip_file)
53
-
63
+
54
64
  # Handle the extracted files
55
65
  result = handle_extracted_dataset(extracted_dir, options)
56
-
66
+
57
67
  # Cache parsed CSV data if requested
58
68
  if options[:use_cache] && options[:parse_csv] && (result.is_a?(Hash) || result.is_a?(Array))
59
69
  cache_key = generate_cache_key(dataset_path)
60
70
  cache_parsed_data(cache_key, result)
61
71
  end
62
-
72
+
63
73
  result
64
74
  end
65
-
66
-
75
+
67
76
  def dataset_files(dataset_owner, dataset_name)
68
77
  dataset_path = "#{dataset_owner}/#{dataset_name}"
69
78
  response = authenticated_request(:get, "#{Constants::DATASET_ENDPOINTS[:files]}/#{dataset_path}")
70
-
71
- unless response.success?
72
- raise DatasetNotFoundError, "Dataset not found or accessible: #{dataset_path}"
73
- end
74
-
79
+
80
+ raise DatasetNotFoundError, "Dataset not found or accessible: #{dataset_path}" unless response.success?
81
+
75
82
  Oj.load(response.body)
76
83
  rescue Oj::ParseError => e
77
84
  raise ParseError, "Failed to parse dataset files response: #{e.message}"
78
85
  end
79
-
86
+
80
87
  def parse_csv_to_json(file_path)
81
88
  raise Error, "File does not exist: #{file_path}" unless File.exist?(file_path)
82
89
  raise Error, "File is not a CSV: #{file_path}" unless csv_file?(file_path)
83
-
90
+
84
91
  data = []
85
92
  CSV.foreach(file_path, headers: true) do |row|
86
93
  data << row.to_hash
87
94
  end
88
-
95
+
89
96
  data
90
97
  rescue CSV::MalformedCSVError => e
91
98
  raise ParseError, "Failed to parse CSV file: #{e.message}"
92
99
  end
93
-
100
+
94
101
  private
95
-
102
+
96
103
  def valid_credential?(credential)
97
104
  credential && !credential.to_s.strip.empty?
98
105
  end
99
-
106
+
100
107
  def load_credentials(username, api_key, credentials_file)
101
108
  # Try provided credentials file first
102
109
  if credentials_file && File.exist?(credentials_file)
@@ -114,63 +121,63 @@ module Kaggle
114
121
  @api_key = api_key || ENV['KAGGLE_KEY']
115
122
  end
116
123
  end
117
-
124
+
118
125
  def load_credentials_from_file(file_path)
119
126
  content = File.read(file_path)
120
127
  Oj.load(content)
121
128
  rescue Oj::ParseError => e
122
129
  raise AuthenticationError, "Invalid credentials file format: #{e.message}"
123
- rescue => e
130
+ rescue StandardError => e
124
131
  raise AuthenticationError, "Failed to read credentials file: #{e.message}"
125
132
  end
126
-
133
+
127
134
  def ensure_directories_exist
128
135
  FileUtils.mkdir_p(@download_path) unless Dir.exist?(@download_path)
129
136
  FileUtils.mkdir_p(@cache_path) unless Dir.exist?(@cache_path)
130
137
  end
131
-
138
+
132
139
  def setup_httparty_options
133
140
  self.class.default_options.merge!({
134
- headers: Constants::REQUIRED_HEADERS,
135
- timeout: @timeout,
136
- basic_auth: {
137
- username: @username,
138
- password: @api_key
139
- }
140
- })
141
- end
142
-
141
+ headers: Constants::REQUIRED_HEADERS,
142
+ timeout: @timeout,
143
+ basic_auth: {
144
+ username: @username,
145
+ password: @api_key
146
+ }
147
+ })
148
+ end
149
+
143
150
  def authenticated_request(method, endpoint, options = {})
144
151
  self.class.send(method, endpoint, options)
145
152
  rescue Timeout::Error, Net::ReadTimeout, Net::OpenTimeout
146
153
  raise Error, 'Request timed out'
147
- rescue => e
154
+ rescue StandardError => e
148
155
  raise Error, "Request failed: #{e.message}"
149
156
  end
150
-
157
+
151
158
  def get_extracted_dir(dataset_path)
152
159
  dir_name = dataset_path.gsub('/', '_')
153
160
  File.join(@download_path, dir_name)
154
161
  end
155
-
162
+
156
163
  def save_zip_file(dataset_path, content)
157
164
  filename = "#{dataset_path.gsub('/', '_')}.zip"
158
165
  file_path = File.join(@download_path, filename)
159
-
166
+
160
167
  File.open(file_path, 'wb') do |file|
161
168
  file.write(content)
162
169
  end
163
-
170
+
164
171
  file_path
165
172
  end
166
-
173
+
167
174
  def extract_zip_file(zip_file_path, extract_to_dir)
168
175
  FileUtils.mkdir_p(extract_to_dir)
169
-
176
+
170
177
  Zip::File.open(zip_file_path) do |zip_file|
171
178
  zip_file.each do |entry|
172
179
  extract_path = File.join(extract_to_dir, entry.name)
173
-
180
+
174
181
  if entry.directory?
175
182
  # Create directory
176
183
  FileUtils.mkdir_p(extract_path)
@@ -178,7 +185,7 @@ module Kaggle
178
185
  # Create parent directory if it doesn't exist
179
186
  parent_dir = File.dirname(extract_path)
180
187
  FileUtils.mkdir_p(parent_dir) unless Dir.exist?(parent_dir)
181
-
188
+
182
189
  # Extract file manually to avoid path issues
183
190
  File.open(extract_path, 'wb') do |f|
184
191
  f.write entry.get_input_stream.read
@@ -189,16 +196,16 @@ module Kaggle
189
196
  rescue Zip::Error => e
190
197
  raise DownloadError, "Failed to extract zip file: #{e.message}"
191
198
  end
192
-
199
+
193
200
  def handle_existing_dataset(extracted_dir, options)
194
201
  if options[:parse_csv]
195
202
  csv_files = find_csv_files(extracted_dir)
196
203
  return parse_csv_files_to_json(csv_files) unless csv_files.empty?
197
204
  end
198
-
205
+
199
206
  extracted_dir
200
207
  end
201
-
208
+
202
209
  def handle_extracted_dataset(extracted_dir, options)
203
210
  if options[:parse_csv]
204
211
  csv_files = find_csv_files(extracted_dir)
@@ -207,49 +214,48 @@ module Kaggle
207
214
  return parsed_data
208
215
  end
209
216
  end
210
-
217
+
211
218
  extracted_dir
212
219
  end
213
-
220
+
214
221
  def find_csv_files(directory)
215
222
  Dir.glob(File.join(directory, '**', '*.csv'))
216
223
  end
217
-
224
+
218
225
  def parse_csv_files_to_json(csv_files)
219
226
  result = {}
220
-
227
+
221
228
  csv_files.each do |csv_file|
222
229
  file_name = File.basename(csv_file, '.csv')
223
230
  result[file_name] = parse_csv_to_json(csv_file)
224
231
  end
225
-
232
+
226
233
  # If there's only one CSV file, return its data directly
227
234
  result.length == 1 ? result.values.first : result
228
235
  end
229
-
236
+
230
237
  def generate_cache_key(dataset_path)
231
238
  "#{dataset_path.gsub('/', '_')}_parsed.json"
232
239
  end
233
-
240
+
234
241
  def cached_file_exists?(cache_key)
235
242
  File.exist?(File.join(@cache_path, cache_key))
236
243
  end
237
-
244
+
238
245
  def load_from_cache(cache_key)
239
246
  cache_file_path = File.join(@cache_path, cache_key)
240
247
  Oj.load(File.read(cache_file_path))
241
248
  rescue Oj::ParseError => e
242
249
  raise ParseError, "Failed to parse cached data: #{e.message}"
243
250
  end
244
-
251
+
245
252
  def cache_parsed_data(cache_key, data)
246
253
  cache_file_path = File.join(@cache_path, cache_key)
247
254
  File.write(cache_file_path, Oj.dump(data, mode: :compat, indent: 2))
248
255
  end
249
-
256
+
250
257
  def csv_file?(file_path)
251
258
  File.extname(file_path).downcase == '.csv'
252
259
  end
253
-
254
260
  end
255
- end
261
+ end
@@ -1,23 +1,23 @@
1
1
  module Kaggle
2
2
  module Constants
3
3
  BASE_URL = 'https://www.kaggle.com/api/v1'
4
-
4
+
5
5
  DEFAULT_DOWNLOAD_PATH = './downloads'
6
6
  DEFAULT_CACHE_PATH = './cache'
7
7
  DEFAULT_CREDENTIALS_FILE = './kaggle.json'
8
8
  DEFAULT_TIMEOUT = 30
9
-
9
+
10
10
  SUPPORTED_FORMATS = %w[csv json].freeze
11
-
11
+
12
12
  DATASET_ENDPOINTS = {
13
13
  view: '/datasets/view',
14
14
  download: '/datasets/download',
15
15
  files: '/datasets/data'
16
16
  }.freeze
17
-
17
+
18
18
  REQUIRED_HEADERS = {
19
19
  'User-Agent' => 'Kaggle Ruby Client/0.0.1',
20
20
  'Accept' => 'application/json'
21
21
  }.freeze
22
22
  end
23
- end
23
+ end
@@ -1,3 +1,3 @@
1
1
  module Kaggle
2
- VERSION = '0.0.1'
3
- end
2
+ VERSION = '0.0.3'
3
+ end
data/lib/kaggle.rb CHANGED
@@ -16,4 +16,5 @@ module Kaggle
16
16
  class DatasetNotFoundError < Error; end
17
17
  class DownloadError < Error; end
18
18
  class ParseError < Error; end
19
- end
19
+ class CacheNotFoundError < Error; end
20
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kaggle
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Your Name
@@ -10,61 +10,61 @@ cert_chain: []
10
10
  date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
- name: httparty
13
+ name: csv
14
14
  requirement: !ruby/object:Gem::Requirement
15
15
  requirements:
16
16
  - - ">="
17
17
  - !ruby/object:Gem::Version
18
- version: '0.23'
18
+ version: '3.3'
19
19
  type: :runtime
20
20
  prerelease: false
21
21
  version_requirements: !ruby/object:Gem::Requirement
22
22
  requirements:
23
23
  - - ">="
24
24
  - !ruby/object:Gem::Version
25
- version: '0.23'
25
+ version: '3.3'
26
26
  - !ruby/object:Gem::Dependency
27
- name: csv
27
+ name: fileutils
28
28
  requirement: !ruby/object:Gem::Requirement
29
29
  requirements:
30
30
  - - ">="
31
31
  - !ruby/object:Gem::Version
32
- version: '3.3'
32
+ version: '1.7'
33
33
  type: :runtime
34
34
  prerelease: false
35
35
  version_requirements: !ruby/object:Gem::Requirement
36
36
  requirements:
37
37
  - - ">="
38
38
  - !ruby/object:Gem::Version
39
- version: '3.3'
39
+ version: '1.7'
40
40
  - !ruby/object:Gem::Dependency
41
- name: oj
41
+ name: httparty
42
42
  requirement: !ruby/object:Gem::Requirement
43
43
  requirements:
44
- - - '='
44
+ - - ">="
45
45
  - !ruby/object:Gem::Version
46
- version: 3.16.11
46
+ version: '0.23'
47
47
  type: :runtime
48
48
  prerelease: false
49
49
  version_requirements: !ruby/object:Gem::Requirement
50
50
  requirements:
51
- - - '='
51
+ - - ">="
52
52
  - !ruby/object:Gem::Version
53
- version: 3.16.11
53
+ version: '0.23'
54
54
  - !ruby/object:Gem::Dependency
55
- name: fileutils
55
+ name: oj
56
56
  requirement: !ruby/object:Gem::Requirement
57
57
  requirements:
58
- - - ">="
58
+ - - '='
59
59
  - !ruby/object:Gem::Version
60
- version: '1.7'
60
+ version: 3.16.11
61
61
  type: :runtime
62
62
  prerelease: false
63
63
  version_requirements: !ruby/object:Gem::Requirement
64
64
  requirements:
65
- - - ">="
65
+ - - '='
66
66
  - !ruby/object:Gem::Version
67
- version: '1.7'
67
+ version: 3.16.11
68
68
  - !ruby/object:Gem::Dependency
69
69
  name: rubyzip
70
70
  requirement: !ruby/object:Gem::Requirement
@@ -79,20 +79,6 @@ dependencies:
79
79
  - - ">="
80
80
  - !ruby/object:Gem::Version
81
81
  version: '2.0'
82
- - !ruby/object:Gem::Dependency
83
- name: rake
84
- requirement: !ruby/object:Gem::Requirement
85
- requirements:
86
- - - "~>"
87
- - !ruby/object:Gem::Version
88
- version: 13.3.0
89
- type: :development
90
- prerelease: false
91
- version_requirements: !ruby/object:Gem::Requirement
92
- requirements:
93
- - - "~>"
94
- - !ruby/object:Gem::Version
95
- version: 13.3.0
96
82
  - !ruby/object:Gem::Dependency
97
83
  name: minitest
98
84
  requirement: !ruby/object:Gem::Requirement
@@ -136,47 +122,47 @@ dependencies:
136
122
  - !ruby/object:Gem::Version
137
123
  version: 1.7.1
138
124
  - !ruby/object:Gem::Dependency
139
- name: webmock
125
+ name: mocha
140
126
  requirement: !ruby/object:Gem::Requirement
141
127
  requirements:
142
128
  - - "~>"
143
129
  - !ruby/object:Gem::Version
144
- version: 3.24.0
130
+ version: 2.4.5
145
131
  type: :development
146
132
  prerelease: false
147
133
  version_requirements: !ruby/object:Gem::Requirement
148
134
  requirements:
149
135
  - - "~>"
150
136
  - !ruby/object:Gem::Version
151
- version: 3.24.0
137
+ version: 2.4.5
152
138
  - !ruby/object:Gem::Dependency
153
- name: mocha
139
+ name: pry
154
140
  requirement: !ruby/object:Gem::Requirement
155
141
  requirements:
156
142
  - - "~>"
157
143
  - !ruby/object:Gem::Version
158
- version: 2.4.5
144
+ version: 0.15.2
159
145
  type: :development
160
146
  prerelease: false
161
147
  version_requirements: !ruby/object:Gem::Requirement
162
148
  requirements:
163
149
  - - "~>"
164
150
  - !ruby/object:Gem::Version
165
- version: 2.4.5
151
+ version: 0.15.2
166
152
  - !ruby/object:Gem::Dependency
167
- name: pry
153
+ name: rake
168
154
  requirement: !ruby/object:Gem::Requirement
169
155
  requirements:
170
156
  - - "~>"
171
157
  - !ruby/object:Gem::Version
172
- version: 0.15.2
158
+ version: 13.3.0
173
159
  type: :development
174
160
  prerelease: false
175
161
  version_requirements: !ruby/object:Gem::Requirement
176
162
  requirements:
177
163
  - - "~>"
178
164
  - !ruby/object:Gem::Version
179
- version: 0.15.2
165
+ version: 13.3.0
180
166
  - !ruby/object:Gem::Dependency
181
167
  name: simplecov
182
168
  requirement: !ruby/object:Gem::Requirement
@@ -205,6 +191,20 @@ dependencies:
205
191
  - - "~>"
206
192
  - !ruby/object:Gem::Version
207
193
  version: 0.9.10
194
+ - !ruby/object:Gem::Dependency
195
+ name: webmock
196
+ requirement: !ruby/object:Gem::Requirement
197
+ requirements:
198
+ - - "~>"
199
+ - !ruby/object:Gem::Version
200
+ version: 3.24.0
201
+ type: :development
202
+ prerelease: false
203
+ version_requirements: !ruby/object:Gem::Requirement
204
+ requirements:
205
+ - - "~>"
206
+ - !ruby/object:Gem::Version
207
+ version: 3.24.0
208
208
  description: A Ruby gem for interacting with the Kaggle API, including dataset downloads
209
209
  with caching support
210
210
  email:
@@ -228,7 +228,6 @@ files:
228
228
  - plans/benchmarks.md
229
229
  - plans/cli_tool.md
230
230
  - plans/initial_prompt.md
231
- - plans/lists.md
232
231
  - plans/models.md
233
232
  - plans/roadmap.md
234
233
  homepage: https://github.com/yourusername/kaggle
data/plans/lists.md DELETED
@@ -1,77 +0,0 @@
1
- # Lists Enhancement Plan
2
-
3
- ## Overview
4
- Expand the current listing functionality to provide comprehensive discovery and filtering capabilities for Kaggle resources.
5
-
6
- ## Current State
7
- - Basic dataset listing with search and pagination
8
- - Simple dataset file listing
9
-
10
- ## Planned Enhancements
11
-
12
- ### Phase 1: Enhanced Dataset Lists
13
- - [ ] **Advanced Filtering**: Filter by license, file formats, size, update date
14
- - [ ] **Sorting Options**: Sort by popularity, date, size, downloads
15
- - [ ] **Category Browsing**: Browse datasets by category/topic
16
- - [ ] **User/Organization Datasets**: List datasets by specific users or organizations
17
- - [ ] **Featured Datasets**: Highlight trending or featured datasets
18
-
19
- ### Phase 2: Competition Lists
20
- - [ ] **Competition Discovery**: List active, completed, and upcoming competitions
21
- - [ ] **Competition Filtering**: Filter by category, prize pool, participant count
22
- - [ ] **Competition Search**: Search competitions by title, description, tags
23
- - [ ] **Personal Competitions**: List user's participated competitions
24
- - [ ] **Competition Metrics**: Show participation stats, deadlines, prizes
25
-
26
- ### Phase 3: Model Lists
27
- - [ ] **Model Discovery**: List available models and frameworks
28
- - [ ] **Model Filtering**: Filter by framework, task type, performance metrics
29
- - [ ] **Model Versions**: Track different versions of models
30
- - [ ] **Popular Models**: Highlight trending and highly-rated models
31
- - [ ] **User Models**: List models by specific users
32
-
33
- ### Phase 4: Kernel/Notebook Lists
34
- - [ ] **Code Discovery**: List public kernels and notebooks
35
- - [ ] **Language Filtering**: Filter by programming language (R, Python, etc.)
36
- - [ ] **Topic Browsing**: Browse by dataset or competition
37
- - [ ] **Popular Code**: Highlight most-voted and most-forked notebooks
38
- - [ ] **Recent Activity**: Show recently updated kernels
39
-
40
- ## Technical Implementation
41
-
42
- ### API Endpoints
43
- - Implement consistent pagination across all list types
44
- - Add caching layer for frequently accessed lists
45
- - Support bulk operations for multiple list requests
46
-
47
- ### CLI Enhancements
48
- - Interactive filtering and sorting in CLI
49
- - Export capabilities (CSV, JSON, XML)
50
- - Bookmarking and favorites functionality
51
- - Watchlist for monitoring specific items
52
-
53
- ### Data Structures
54
- ```ruby
55
- # Enhanced listing response format
56
- {
57
- items: [], # List of resources
58
- pagination: { # Pagination metadata
59
- page: 1,
60
- per_page: 20,
61
- total_pages: 50,
62
- total_count: 1000
63
- },
64
- filters: { # Applied filters
65
- category: 'finance',
66
- license: 'cc-by',
67
- updated_since: '2023-01-01'
68
- },
69
- sort: { # Current sorting
70
- field: 'popularity',
71
- direction: 'desc'
72
- }
73
- }
74
- ```
75
-
76
- ## Priority: High
77
- Target completion: Version 0.3.0