eegdash 0.3.5.dev89__py3-none-any.whl → 0.3.5.dev92__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eegdash might be problematic. Click here for more details.

eegdash/__init__.py CHANGED
@@ -7,4 +7,4 @@ __init__mongo_client()
7
7
 
8
8
  __all__ = ["EEGDash", "EEGDashDataset", "EEGChallengeDataset"]
9
9
 
10
- __version__ = "0.3.5.dev89"
10
+ __version__ = "0.3.5.dev92"
eegdash/api.py CHANGED
@@ -90,12 +90,16 @@ class EEGDash:
90
90
  ) -> list[Mapping[str, Any]]:
91
91
  """Find records in the MongoDB collection.
92
92
 
93
- This method can be called in two ways:
93
+ This method supports four usage patterns:
94
94
  1. With a pre-built MongoDB query dictionary (positional argument):
95
95
  >>> eegdash.find({"dataset": "ds002718", "subject": {"$in": ["012", "013"]}})
96
96
  2. With user-friendly keyword arguments for simple and multi-value queries:
97
97
  >>> eegdash.find(dataset="ds002718", subject="012")
98
98
  >>> eegdash.find(dataset="ds002718", subject=["012", "013"])
99
+ 3. With an explicit empty query to return all documents:
100
+ >>> eegdash.find({}) # fetches all records (use with care)
101
+ 4. By combining a raw query with kwargs (merged via logical AND):
102
+ >>> eegdash.find({"dataset": "ds002718"}, subject=["012", "013"]) # yields {"$and":[{"dataset":"ds002718"}, {"subject":{"$in":["012","013"]}}]}
99
103
 
100
104
  Parameters
101
105
  ----------
@@ -110,26 +114,34 @@ class EEGDash:
110
114
  list:
111
115
  A list of DB records (string-keyed dictionaries) that match the query.
112
116
 
113
- Raises
114
- ------
115
- ValueError
116
- If both a `query` dictionary and keyword arguments are provided.
117
-
118
117
  """
119
- if query is not None and kwargs:
120
- raise ValueError(
121
- "Provide either a positional 'query' dictionary or keyword arguments, not both."
122
- )
123
-
124
- final_query = {}
125
- if query is not None:
126
- final_query = query
127
- elif kwargs:
128
- final_query = self._build_query_from_kwargs(**kwargs)
118
+ final_query: dict[str, Any] | None = None
119
+
120
+ # Accept explicit empty dict {} to mean "match all"
121
+ raw_query = query if isinstance(query, dict) else None
122
+ kwargs_query = self._build_query_from_kwargs(**kwargs) if kwargs else None
123
+
124
+ # Determine presence, treating {} as a valid raw query
125
+ has_raw = isinstance(raw_query, dict)
126
+ has_kwargs = kwargs_query is not None
127
+
128
+ if has_raw and has_kwargs:
129
+ # Detect conflicting constraints on the same field (e.g., task specified
130
+ # differently in both places) and raise a clear error instead of silently
131
+ # producing an empty result.
132
+ self._raise_if_conflicting_constraints(raw_query, kwargs_query)
133
+ # Merge with logical AND so both constraints apply
134
+ if raw_query: # non-empty dict adds constraints
135
+ final_query = {"$and": [raw_query, kwargs_query]}
136
+ else: # {} adds nothing; use kwargs_query only
137
+ final_query = kwargs_query
138
+ elif has_raw:
139
+ # May be {} meaning match-all, or a non-empty dict
140
+ final_query = raw_query
141
+ elif has_kwargs:
142
+ final_query = kwargs_query
129
143
  else:
130
- # By default, an empty query {} returns all documents.
131
- # This can be dangerous, so we can either allow it or raise an error.
132
- # Let's require an explicit query for safety.
144
+ # Avoid accidental full scans
133
145
  raise ValueError(
134
146
  "find() requires a query dictionary or at least one keyword argument. "
135
147
  "To find all documents, use find({})."
@@ -224,9 +236,12 @@ class EEGDash:
224
236
  return record
225
237
 
226
238
  def _build_query_from_kwargs(self, **kwargs) -> dict[str, Any]:
227
- """Builds and validates a MongoDB query from user-friendly keyword arguments.
239
+ """Build and validate a MongoDB query from user-friendly keyword arguments.
228
240
 
229
- Translates list values into MongoDB's `$in` operator.
241
+ Improvements:
242
+ - Reject None values and empty/whitespace-only strings
243
+ - For list/tuple/set values: strip strings, drop None/empties, deduplicate, and use `$in`
244
+ - Preserve scalars as exact matches
230
245
  """
231
246
  # 1. Validate that all provided keys are allowed for querying
232
247
  unknown_fields = set(kwargs.keys()) - self._ALLOWED_QUERY_FIELDS
@@ -239,19 +254,108 @@ class EEGDash:
239
254
  # 2. Construct the query dictionary
240
255
  query = {}
241
256
  for key, value in kwargs.items():
242
- if isinstance(value, (list, tuple)):
243
- if not value:
257
+ # None is not a valid constraint
258
+ if value is None:
259
+ raise ValueError(
260
+ f"Received None for query parameter '{key}'. Provide a concrete value."
261
+ )
262
+
263
+ # Handle list-like values as multi-constraints
264
+ if isinstance(value, (list, tuple, set)):
265
+ cleaned: list[Any] = []
266
+ for item in value:
267
+ if item is None:
268
+ continue
269
+ if isinstance(item, str):
270
+ item = item.strip()
271
+ if not item:
272
+ continue
273
+ cleaned.append(item)
274
+ # Deduplicate while preserving order
275
+ cleaned = list(dict.fromkeys(cleaned))
276
+ if not cleaned:
244
277
  raise ValueError(
245
278
  f"Received an empty list for query parameter '{key}'. This is not supported."
246
279
  )
247
- # If the value is a list, use the `$in` operator for multi-search
248
- query[key] = {"$in": value}
280
+ query[key] = {"$in": cleaned}
249
281
  else:
250
- # Otherwise, it's a direct match
282
+ # Scalars: trim strings and validate
283
+ if isinstance(value, str):
284
+ value = value.strip()
285
+ if not value:
286
+ raise ValueError(
287
+ f"Received an empty string for query parameter '{key}'."
288
+ )
251
289
  query[key] = value
252
290
 
253
291
  return query
254
292
 
293
+ # --- Query merging and conflict detection helpers ---
294
+ def _extract_simple_constraint(self, query: dict[str, Any], key: str):
295
+ """Extract a simple constraint for a given key from a query dict.
296
+
297
+ Supports only top-level equality (key: value) and $in (key: {"$in": [...]})
298
+ constraints. Returns a tuple (kind, value) where kind is "eq" or "in". If the
299
+ key is not present or uses other operators, returns None.
300
+ """
301
+ if not isinstance(query, dict) or key not in query:
302
+ return None
303
+ val = query[key]
304
+ if isinstance(val, dict):
305
+ if "$in" in val and isinstance(val["$in"], (list, tuple)):
306
+ return ("in", list(val["$in"]))
307
+ return None # unsupported operator shape for conflict checking
308
+ else:
309
+ return ("eq", val)
310
+
311
+ def _raise_if_conflicting_constraints(
312
+ self, raw_query: dict[str, Any], kwargs_query: dict[str, Any]
313
+ ) -> None:
314
+ """Raise ValueError if both query sources define incompatible constraints.
315
+
316
+ We conservatively check only top-level fields with simple equality or $in
317
+ constraints. If a field appears in both queries and constraints are mutually
318
+ exclusive, raise an explicit error to avoid silent empty result sets.
319
+ """
320
+ if not raw_query or not kwargs_query:
321
+ return
322
+
323
+ # Only consider fields we generally allow; skip meta operators like $and
324
+ raw_keys = set(raw_query.keys()) & self._ALLOWED_QUERY_FIELDS
325
+ kw_keys = set(kwargs_query.keys()) & self._ALLOWED_QUERY_FIELDS
326
+ dup_keys = raw_keys & kw_keys
327
+ for key in dup_keys:
328
+ rc = self._extract_simple_constraint(raw_query, key)
329
+ kc = self._extract_simple_constraint(kwargs_query, key)
330
+ if rc is None or kc is None:
331
+ # If either side is non-simple, skip conflict detection for this key
332
+ continue
333
+
334
+ r_kind, r_val = rc
335
+ k_kind, k_val = kc
336
+
337
+ # Normalize to sets when appropriate for simpler checks
338
+ if r_kind == "eq" and k_kind == "eq":
339
+ if r_val != k_val:
340
+ raise ValueError(
341
+ f"Conflicting constraints for '{key}': query={r_val!r} vs kwargs={k_val!r}"
342
+ )
343
+ elif r_kind == "in" and k_kind == "eq":
344
+ if k_val not in r_val:
345
+ raise ValueError(
346
+ f"Conflicting constraints for '{key}': query in {r_val!r} vs kwargs={k_val!r}"
347
+ )
348
+ elif r_kind == "eq" and k_kind == "in":
349
+ if r_val not in k_val:
350
+ raise ValueError(
351
+ f"Conflicting constraints for '{key}': query={r_val!r} vs kwargs in {k_val!r}"
352
+ )
353
+ elif r_kind == "in" and k_kind == "in":
354
+ if len(set(r_val).intersection(k_val)) == 0:
355
+ raise ValueError(
356
+ f"Conflicting constraints for '{key}': disjoint sets {r_val!r} and {k_val!r}"
357
+ )
358
+
255
359
  def load_eeg_data_from_s3(self, s3path: str) -> xr.DataArray:
256
360
  """Load an EEGLAB .set file from an AWS S3 URI and return it as an xarray DataArray.
257
361
 
@@ -676,10 +780,8 @@ class EEGDashDataset(BaseConcatDataset):
676
780
  # If list is provided, let _build_query_from_kwargs turn it into $in later.
677
781
  query_kwargs.setdefault("dataset", dataset)
678
782
 
679
- if query and query_kwargs:
680
- raise ValueError(
681
- "Provide either a 'query' dictionary or keyword arguments for filtering, not both."
682
- )
783
+ # Allow mixing raw DB query with additional keyword filters. Both will be
784
+ # merged by EEGDash.find() (logical AND), so we do not raise here.
683
785
 
684
786
  try:
685
787
  if records is not None:
@@ -723,7 +825,7 @@ class EEGDashDataset(BaseConcatDataset):
723
825
  **base_dataset_kwargs,
724
826
  )
725
827
  )
726
- elif query or query_kwargs:
828
+ elif query is not None or query_kwargs:
727
829
  # This is the DB query path that we are improving
728
830
  datasets = self.find_datasets(
729
831
  query=query,
@@ -786,6 +888,10 @@ class EEGDashDataset(BaseConcatDataset):
786
888
  """
787
889
  datasets: list[EEGDashBaseDataset] = []
788
890
 
891
+ # Build records using either a raw query OR keyword filters, but not both.
892
+ # Note: callers may accidentally pass an empty dict for `query` along with
893
+ # kwargs. In that case, treat it as if no query was provided and rely on kwargs.
894
+ # Always delegate merging of raw query + kwargs to EEGDash.find
789
895
  self.records = self.eeg_dash.find(query, **query_kwargs)
790
896
 
791
897
  for record in self.records:
eegdash/dataset.py CHANGED
@@ -321,7 +321,9 @@ class EEGChallengeDataset(EEGDashDataset):
321
321
  )
322
322
 
323
323
  if self.mini:
324
- if query and "subject" in query:
324
+ # Disallow mixing subject selection with mini=True since mini already
325
+ # applies a predefined subject subset.
326
+ if (query and "subject" in query) or ("subject" in kwargs):
325
327
  raise ValueError(
326
328
  "Query using the parameters `subject` with the class EEGChallengeDataset and `mini==True` is not possible."
327
329
  "Please don't use the `subject` selection twice."
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: eegdash
3
- Version: 0.3.5.dev89
3
+ Version: 0.3.5.dev92
4
4
  Summary: EEG data for machine learning
5
5
  Author-email: Young Truong <dt.young112@gmail.com>, Arnaud Delorme <adelorme@gmail.com>, Aviv Dotan <avivd220@gmail.com>, Oren Shriki <oren70@gmail.com>, Bruno Aristimunha <b.aristimunha@gmail.com>
6
6
  License-Expression: GPL-3.0-only
@@ -1,8 +1,8 @@
1
- eegdash/__init__.py,sha256=YRXApvwCasomxu_F3u7SLZzs2_8Zfk1c4uKZpHZAgO4,240
2
- eegdash/api.py,sha256=wNnyPGiVtETbxZtlCU57LBM1pPaT580VrtqgeX96BbU,33072
1
+ eegdash/__init__.py,sha256=ao48gkXYHPAJnu73RWcBgDLvEE3uNWOScbioW4nbEn4,240
2
+ eegdash/api.py,sha256=yotN4vqurZAxVA4q_DK4z0mhh9P8sbpxKzvyxuRSkcQ,38538
3
3
  eegdash/data_config.py,sha256=OS6ERO-jHrnEOfMJUehY7ieABdsRw_qWzOKJ4pzSfqw,1323
4
4
  eegdash/data_utils.py,sha256=mi9pscui-BPpRH9ovRtGWiSwHG5QN6K_IvJdYaING2I,27679
5
- eegdash/dataset.py,sha256=u2JALkB10fS0Nx0Cou8AYlyn3DyWjqWBTkLgXV458U8,9286
5
+ eegdash/dataset.py,sha256=iGi7m2FNhLgJxxwSsB9JIy01p4tmdlJIPzdL5CDAJU4,9446
6
6
  eegdash/dataset_summary.csv,sha256=EfnPciglkf4Vgc8dDq_1x7Woeeze1II8vOhx60g4yhc,8670
7
7
  eegdash/mongodb.py,sha256=GD3WgA253oFgpzOHrYaj4P1mRjNtDMT5Oj4kVvHswjI,2006
8
8
  eegdash/preprocessing.py,sha256=7S_TTRKPKEk47tTnh2D6WExBt4cctAMxUxGDjJqq5lU,2221
@@ -23,8 +23,8 @@ eegdash/features/feature_bank/dimensionality.py,sha256=j_Ds71Y1AbV2uLFQj8EuXQ4kz
23
23
  eegdash/features/feature_bank/signal.py,sha256=3Tb8z9gX7iZipxQJ9DSyy30JfdmW58kgvimSyZX74p8,3404
24
24
  eegdash/features/feature_bank/spectral.py,sha256=bNB7skusePs1gX7NOU6yRlw_Gr4UOCkO_ylkCgybzug,3319
25
25
  eegdash/features/feature_bank/utils.py,sha256=DGh-Q7-XFIittP7iBBxvsJaZrlVvuY5mw-G7q6C-PCI,1237
26
- eegdash-0.3.5.dev89.dist-info/licenses/LICENSE,sha256=asisR-xupy_NrQBFXnx6yqXeZcYWLvbAaiETl25iXT0,931
27
- eegdash-0.3.5.dev89.dist-info/METADATA,sha256=b10KZfP4aGXPQFDfuU5KVq6AQity1MdYd342PlqTp_Y,9925
28
- eegdash-0.3.5.dev89.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
- eegdash-0.3.5.dev89.dist-info/top_level.txt,sha256=zavO69HQ6MyZM0aQMR2zUS6TAFc7bnN5GEpDpOpFZzU,8
30
- eegdash-0.3.5.dev89.dist-info/RECORD,,
26
+ eegdash-0.3.5.dev92.dist-info/licenses/LICENSE,sha256=asisR-xupy_NrQBFXnx6yqXeZcYWLvbAaiETl25iXT0,931
27
+ eegdash-0.3.5.dev92.dist-info/METADATA,sha256=ThukZMsprgai1PZFYVbURPOmzV3xTMmCPJi-j_sQZps,9925
28
+ eegdash-0.3.5.dev92.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
29
+ eegdash-0.3.5.dev92.dist-info/top_level.txt,sha256=zavO69HQ6MyZM0aQMR2zUS6TAFc7bnN5GEpDpOpFZzU,8
30
+ eegdash-0.3.5.dev92.dist-info/RECORD,,