cjm-transcript-source-select 0.0.1__tar.gz → 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {cjm_transcript_source_select-0.0.1/cjm_transcript_source_select.egg-info → cjm_transcript_source_select-0.0.2}/PKG-INFO +46 -24
  2. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/README.md +45 -23
  3. cjm_transcript_source_select-0.0.2/cjm_transcript_source_select/__init__.py +1 -0
  4. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select/_modidx.py +4 -0
  5. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select/components/selection_queue.py +1 -1
  6. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select/components/source_browser.py +4 -2
  7. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select/routes/core.py +16 -0
  8. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select/routes/filtering.py +10 -2
  9. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select/routes/queue.py +28 -12
  10. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select/services/source_utils.py +44 -13
  11. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2/cjm_transcript_source_select.egg-info}/PKG-INFO +46 -24
  12. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/settings.ini +15 -23
  13. cjm_transcript_source_select-0.0.1/cjm_transcript_source_select/__init__.py +0 -1
  14. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/LICENSE +0 -0
  15. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/MANIFEST.in +0 -0
  16. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select/components/__init__.py +0 -0
  17. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select/components/helpers.py +0 -0
  18. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select/components/local_files.py +0 -0
  19. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select/components/preview_panel.py +0 -0
  20. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select/components/step_renderer.py +0 -0
  21. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select/html_ids.py +0 -0
  22. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select/models.py +0 -0
  23. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select/routes/__init__.py +0 -0
  24. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select/routes/init.py +0 -0
  25. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select/routes/local_files.py +0 -0
  26. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select/routes/tabs.py +0 -0
  27. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select/services/__init__.py +0 -0
  28. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select/services/source.py +0 -0
  29. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select/utils.py +0 -0
  30. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select.egg-info/SOURCES.txt +0 -0
  31. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select.egg-info/dependency_links.txt +0 -0
  32. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select.egg-info/entry_points.txt +0 -0
  33. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select.egg-info/not-zip-safe +0 -0
  34. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select.egg-info/requires.txt +0 -0
  35. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/cjm_transcript_source_select.egg-info/top_level.txt +0 -0
  36. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/pyproject.toml +0 -0
  37. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/setup.cfg +0 -0
  38. {cjm_transcript_source_select-0.0.1 → cjm_transcript_source_select-0.0.2}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: cjm-transcript-source-select
3
- Version: 0.0.1
3
+ Version: 0.0.2
4
4
  Summary: FastHTML source selection component for transcript decomposition workflows, with federated database browsing, drag-drop ordering, and keyboard navigation.
5
5
  Home-page: https://github.com/cj-mills/cjm-transcript-source-select
6
6
  Author: Christian J. Mills
@@ -105,51 +105,51 @@ graph LR
105
105
  components_local_files --> html_ids
106
106
  components_preview_panel --> html_ids
107
107
  components_selection_queue --> html_ids
108
- components_source_browser --> services_source_utils
109
108
  components_source_browser --> utils
109
+ components_source_browser --> services_source_utils
110
110
  components_source_browser --> html_ids
111
- components_step_renderer --> components_helpers
112
- components_step_renderer --> components_source_browser
113
111
  components_step_renderer --> utils
112
+ components_step_renderer --> components_helpers
114
113
  components_step_renderer --> components_preview_panel
115
- components_step_renderer --> html_ids
116
- components_step_renderer --> components_local_files
117
114
  components_step_renderer --> components_selection_queue
115
+ components_step_renderer --> components_local_files
116
+ components_step_renderer --> components_source_browser
118
117
  components_step_renderer --> models
119
- routes_core --> components_step_renderer
120
- routes_core --> models
121
- routes_core --> components_selection_queue
118
+ components_step_renderer --> html_ids
122
119
  routes_core --> components_source_browser
120
+ routes_core --> components_selection_queue
123
121
  routes_core --> services_source
124
- routes_filtering --> routes_core
125
- routes_filtering --> services_source_utils
126
- routes_filtering --> models
122
+ routes_core --> models
123
+ routes_core --> components_step_renderer
127
124
  routes_filtering --> components_source_browser
125
+ routes_filtering --> services_source_utils
126
+ routes_filtering --> routes_core
128
127
  routes_filtering --> services_source
129
- routes_init --> routes_queue
130
- routes_init --> routes_local_files
131
- routes_init --> models
128
+ routes_filtering --> models
132
129
  routes_init --> routes_filtering
133
- routes_init --> routes_tabs
134
130
  routes_init --> services_source
131
+ routes_init --> models
132
+ routes_init --> routes_queue
133
+ routes_init --> routes_local_files
135
134
  routes_init --> routes_core
135
+ routes_init --> routes_tabs
136
136
  routes_local_files --> components_local_files
137
137
  routes_local_files --> routes_core
138
138
  routes_local_files --> services_source
139
139
  routes_local_files --> models
140
140
  routes_queue --> routes_core
141
- routes_queue --> services_source_utils
141
+ routes_queue --> services_source
142
142
  routes_queue --> models
143
+ routes_queue --> services_source_utils
143
144
  routes_queue --> components_preview_panel
144
- routes_queue --> services_source
145
- routes_tabs --> routes_local_files
146
145
  routes_tabs --> components_local_files
147
- routes_tabs --> services_source_utils
146
+ routes_tabs --> routes_local_files
148
147
  routes_tabs --> routes_core
149
148
  routes_tabs --> components_source_browser
150
- routes_tabs --> models
151
149
  routes_tabs --> components_step_renderer
150
+ routes_tabs --> models
152
151
  routes_tabs --> services_source
152
+ routes_tabs --> services_source_utils
153
153
  ```
154
154
 
155
155
  *50 cross-module dependencies detected*
@@ -186,6 +186,16 @@ def _get_step_state(
186
186
  "Get the selection step state from the workflow state store."
187
187
  ```
188
188
 
189
+ ``` python
190
+ def _check_duplicate_media_path(
191
+ source_service: SourceService, # Source service for lookups
192
+ record_id: str, # Candidate record ID
193
+ provider_id: str, # Candidate provider ID
194
+ selected_sources: List[Dict[str, str]], # Current selections
195
+ ) -> bool: # True if adding would duplicate an audio file
196
+ "Check if adding a source would duplicate an already-selected audio file."
197
+ ```
198
+
189
199
  ``` python
190
200
  def _get_active_source_tab(
191
201
  state_store: WorkflowStateStore, # The workflow state store
@@ -658,6 +668,7 @@ def _handle_selection_remove(
658
668
  request, # FastHTML request object
659
669
  sess, # FastHTML session object
660
670
  record_id: str, # Job ID to remove
671
+ provider_id: str, # Plugin name for the source
661
672
  urls: SelectionUrls, # URL bundle for rendering
662
673
  ): # Queue component with OOB stats, optionally with OOB source list
663
674
  "Remove a source from the selection queue."
@@ -698,7 +709,7 @@ def _handle_selection_select_all(
698
709
  grouping_mode: str, # Current grouping mode: "media_path" or "batch_id"
699
710
  urls: SelectionUrls, # URL bundle for rendering
700
711
  ): # Queue component with OOB stats, optionally with OOB source list
701
- "Select all transcriptions for a given group."
712
+ "Select all transcriptions for a given group, skipping duplicate audio sources."
702
713
  ```
703
714
 
704
715
  ``` python
@@ -1088,6 +1099,7 @@ from cjm_transcript_source_select.services.source_utils import (
1088
1099
  group_transcriptions,
1089
1100
  group_transcriptions_by_audio,
1090
1101
  is_source_selected,
1102
+ get_selected_media_paths,
1091
1103
  filter_transcriptions,
1092
1104
  select_all_in_group,
1093
1105
  toggle_source_selection,
@@ -1133,9 +1145,18 @@ def group_transcriptions_by_audio(
1133
1145
  ``` python
1134
1146
  def is_source_selected(
1135
1147
  record_id: str, # Job ID to check
1148
+ provider_id: str, # Provider ID to check
1136
1149
  selected_sources: List[Dict[str, str]] # List of selected sources
1137
1150
  ) -> bool: # True if source is selected
1138
- "Check if a source is in the selected list."
1151
+ "Check if a source is in the selected list by (record_id, provider_id) pair."
1152
+ ```
1153
+
1154
+ ``` python
1155
+ def get_selected_media_paths(
1156
+ selected_sources: List[Dict[str, str]], # Current selections (record_id, provider_id)
1157
+ all_transcriptions: List[Dict[str, Any]], # All available transcription records
1158
+ ) -> Set[str]: # Media paths already represented in selections
1159
+ "Get the set of media_paths for currently selected sources."
1139
1160
  ```
1140
1161
 
1141
1162
  ``` python
@@ -1152,6 +1173,7 @@ def select_all_in_group(
1152
1173
  group_key: str, # Group key to match against
1153
1174
  grouping_mode: str, # Grouping mode: "media_path" or "batch_id"
1154
1175
  selected_sources: List[Dict[str, str]], # Current selections
1176
+ excluded_media_paths: Optional[Set[str]] = None, # Media paths to skip (already selected)
1155
1177
  ) -> List[Dict[str, str]]: # Updated selections with new items appended
1156
1178
  "Add all transcriptions matching a group key to the selection list, skipping duplicates."
1157
1179
  ```
@@ -1162,7 +1184,7 @@ def toggle_source_selection(
1162
1184
  provider_id: str, # Plugin name for the source
1163
1185
  selected_sources: List[Dict[str, str]], # Current selections
1164
1186
  ) -> List[Dict[str, str]]: # Updated selections
1165
- "Toggle a source in or out of the selection list."
1187
+ "Toggle a source in or out of the selection list by (record_id, provider_id) pair."
1166
1188
  ```
1167
1189
 
1168
1190
  ``` python
@@ -62,51 +62,51 @@ graph LR
62
62
  components_local_files --> html_ids
63
63
  components_preview_panel --> html_ids
64
64
  components_selection_queue --> html_ids
65
- components_source_browser --> services_source_utils
66
65
  components_source_browser --> utils
66
+ components_source_browser --> services_source_utils
67
67
  components_source_browser --> html_ids
68
- components_step_renderer --> components_helpers
69
- components_step_renderer --> components_source_browser
70
68
  components_step_renderer --> utils
69
+ components_step_renderer --> components_helpers
71
70
  components_step_renderer --> components_preview_panel
72
- components_step_renderer --> html_ids
73
- components_step_renderer --> components_local_files
74
71
  components_step_renderer --> components_selection_queue
72
+ components_step_renderer --> components_local_files
73
+ components_step_renderer --> components_source_browser
75
74
  components_step_renderer --> models
76
- routes_core --> components_step_renderer
77
- routes_core --> models
78
- routes_core --> components_selection_queue
75
+ components_step_renderer --> html_ids
79
76
  routes_core --> components_source_browser
77
+ routes_core --> components_selection_queue
80
78
  routes_core --> services_source
81
- routes_filtering --> routes_core
82
- routes_filtering --> services_source_utils
83
- routes_filtering --> models
79
+ routes_core --> models
80
+ routes_core --> components_step_renderer
84
81
  routes_filtering --> components_source_browser
82
+ routes_filtering --> services_source_utils
83
+ routes_filtering --> routes_core
85
84
  routes_filtering --> services_source
86
- routes_init --> routes_queue
87
- routes_init --> routes_local_files
88
- routes_init --> models
85
+ routes_filtering --> models
89
86
  routes_init --> routes_filtering
90
- routes_init --> routes_tabs
91
87
  routes_init --> services_source
88
+ routes_init --> models
89
+ routes_init --> routes_queue
90
+ routes_init --> routes_local_files
92
91
  routes_init --> routes_core
92
+ routes_init --> routes_tabs
93
93
  routes_local_files --> components_local_files
94
94
  routes_local_files --> routes_core
95
95
  routes_local_files --> services_source
96
96
  routes_local_files --> models
97
97
  routes_queue --> routes_core
98
- routes_queue --> services_source_utils
98
+ routes_queue --> services_source
99
99
  routes_queue --> models
100
+ routes_queue --> services_source_utils
100
101
  routes_queue --> components_preview_panel
101
- routes_queue --> services_source
102
- routes_tabs --> routes_local_files
103
102
  routes_tabs --> components_local_files
104
- routes_tabs --> services_source_utils
103
+ routes_tabs --> routes_local_files
105
104
  routes_tabs --> routes_core
106
105
  routes_tabs --> components_source_browser
107
- routes_tabs --> models
108
106
  routes_tabs --> components_step_renderer
107
+ routes_tabs --> models
109
108
  routes_tabs --> services_source
109
+ routes_tabs --> services_source_utils
110
110
  ```
111
111
 
112
112
  *50 cross-module dependencies detected*
@@ -143,6 +143,16 @@ def _get_step_state(
143
143
  "Get the selection step state from the workflow state store."
144
144
  ```
145
145
 
146
+ ``` python
147
+ def _check_duplicate_media_path(
148
+ source_service: SourceService, # Source service for lookups
149
+ record_id: str, # Candidate record ID
150
+ provider_id: str, # Candidate provider ID
151
+ selected_sources: List[Dict[str, str]], # Current selections
152
+ ) -> bool: # True if adding would duplicate an audio file
153
+ "Check if adding a source would duplicate an already-selected audio file."
154
+ ```
155
+
146
156
  ``` python
147
157
  def _get_active_source_tab(
148
158
  state_store: WorkflowStateStore, # The workflow state store
@@ -615,6 +625,7 @@ def _handle_selection_remove(
615
625
  request, # FastHTML request object
616
626
  sess, # FastHTML session object
617
627
  record_id: str, # Job ID to remove
628
+ provider_id: str, # Plugin name for the source
618
629
  urls: SelectionUrls, # URL bundle for rendering
619
630
  ): # Queue component with OOB stats, optionally with OOB source list
620
631
  "Remove a source from the selection queue."
@@ -655,7 +666,7 @@ def _handle_selection_select_all(
655
666
  grouping_mode: str, # Current grouping mode: "media_path" or "batch_id"
656
667
  urls: SelectionUrls, # URL bundle for rendering
657
668
  ): # Queue component with OOB stats, optionally with OOB source list
658
- "Select all transcriptions for a given group."
669
+ "Select all transcriptions for a given group, skipping duplicate audio sources."
659
670
  ```
660
671
 
661
672
  ``` python
@@ -1045,6 +1056,7 @@ from cjm_transcript_source_select.services.source_utils import (
1045
1056
  group_transcriptions,
1046
1057
  group_transcriptions_by_audio,
1047
1058
  is_source_selected,
1059
+ get_selected_media_paths,
1048
1060
  filter_transcriptions,
1049
1061
  select_all_in_group,
1050
1062
  toggle_source_selection,
@@ -1090,9 +1102,18 @@ def group_transcriptions_by_audio(
1090
1102
  ``` python
1091
1103
  def is_source_selected(
1092
1104
  record_id: str, # Job ID to check
1105
+ provider_id: str, # Provider ID to check
1093
1106
  selected_sources: List[Dict[str, str]] # List of selected sources
1094
1107
  ) -> bool: # True if source is selected
1095
- "Check if a source is in the selected list."
1108
+ "Check if a source is in the selected list by (record_id, provider_id) pair."
1109
+ ```
1110
+
1111
+ ``` python
1112
+ def get_selected_media_paths(
1113
+ selected_sources: List[Dict[str, str]], # Current selections (record_id, provider_id)
1114
+ all_transcriptions: List[Dict[str, Any]], # All available transcription records
1115
+ ) -> Set[str]: # Media paths already represented in selections
1116
+ "Get the set of media_paths for currently selected sources."
1096
1117
  ```
1097
1118
 
1098
1119
  ``` python
@@ -1109,6 +1130,7 @@ def select_all_in_group(
1109
1130
  group_key: str, # Group key to match against
1110
1131
  grouping_mode: str, # Grouping mode: "media_path" or "batch_id"
1111
1132
  selected_sources: List[Dict[str, str]], # Current selections
1133
+ excluded_media_paths: Optional[Set[str]] = None, # Media paths to skip (already selected)
1112
1134
  ) -> List[Dict[str, str]]: # Updated selections with new items appended
1113
1135
  "Add all transcriptions matching a group key to the selection list, skipping duplicates."
1114
1136
  ```
@@ -1119,7 +1141,7 @@ def toggle_source_selection(
1119
1141
  provider_id: str, # Plugin name for the source
1120
1142
  selected_sources: List[Dict[str, str]], # Current selections
1121
1143
  ) -> List[Dict[str, str]]: # Updated selections
1122
- "Toggle a source in or out of the selection list."
1144
+ "Toggle a source in or out of the selection list by (record_id, provider_id) pair."
1123
1145
  ```
1124
1146
 
1125
1147
  ``` python
@@ -0,0 +1 @@
1
+ __version__ = "0.0.2"
@@ -79,6 +79,8 @@ d = { 'settings': { 'branch': 'main',
79
79
  'cjm_transcript_source_select/models.py')},
80
80
  'cjm_transcript_source_select.routes.core': { 'cjm_transcript_source_select.routes.core._build_queue_response': ( 'routes/core.html#_build_queue_response',
81
81
  'cjm_transcript_source_select/routes/core.py'),
82
+ 'cjm_transcript_source_select.routes.core._check_duplicate_media_path': ( 'routes/core.html#_check_duplicate_media_path',
83
+ 'cjm_transcript_source_select/routes/core.py'),
82
84
  'cjm_transcript_source_select.routes.core._get_active_source_tab': ( 'routes/core.html#_get_active_source_tab',
83
85
  'cjm_transcript_source_select/routes/core.py'),
84
86
  'cjm_transcript_source_select.routes.core._get_step_state': ( 'routes/core.html#_get_step_state',
@@ -197,6 +199,8 @@ d = { 'settings': { 'branch': 'main',
197
199
  'cjm_transcript_source_select/services/source_utils.py'),
198
200
  'cjm_transcript_source_select.services.source_utils.filter_transcriptions': ( 'services/source_utils.html#filter_transcriptions',
199
201
  'cjm_transcript_source_select/services/source_utils.py'),
202
+ 'cjm_transcript_source_select.services.source_utils.get_selected_media_paths': ( 'services/source_utils.html#get_selected_media_paths',
203
+ 'cjm_transcript_source_select/services/source_utils.py'),
200
204
  'cjm_transcript_source_select.services.source_utils.group_transcriptions': ( 'services/source_utils.html#group_transcriptions',
201
205
  'cjm_transcript_source_select/services/source_utils.py'),
202
206
  'cjm_transcript_source_select.services.source_utils.group_transcriptions_by_audio': ( 'services/source_utils.html#group_transcriptions_by_audio',
@@ -73,7 +73,7 @@ def _render_queue_item(
73
73
  lucide_icon("x", size=4, cls=str(text_dui.base_content.opacity(60))),
74
74
  cls=combine_classes(btn, btn_styles.ghost, btn_sizes.xs, m.l(1)),
75
75
  hx_post=remove_url,
76
- hx_vals=json.dumps({"record_id": record_id}),
76
+ hx_vals=json.dumps({"record_id": record_id, "provider_id": provider_id}),
77
77
  hx_target=SelectionHtmlIds.as_selector(SelectionHtmlIds.QUEUE_CONTAINER),
78
78
  hx_swap="outerHTML",
79
79
  data_action="remove",
@@ -154,7 +154,7 @@ def _render_source_row(
154
154
  hx_vals=json.dumps({"record_id": record_id, "provider_id": provider_id}),
155
155
  hx_target=SelectionHtmlIds.as_selector(SelectionHtmlIds.QUEUE_CONTAINER),
156
156
  hx_swap="outerHTML",
157
- name=f"source_{record_id}"
157
+ name=f"source_{provider_id}_{record_id}"
158
158
  ),
159
159
  cls=str(w(12))
160
160
  ),
@@ -260,7 +260,9 @@ def _render_source_list(
260
260
 
261
261
  # Add record rows
262
262
  for record in records:
263
- is_selected = is_source_selected(record.get("record_id", ""), selected_sources)
263
+ is_selected = is_source_selected(
264
+ record.get("record_id", ""), record.get("provider_id", ""), selected_sources
265
+ )
264
266
  table_rows.append(_render_source_row(
265
267
  record, is_selected, add_url, remove_url, preview_url,
266
268
  is_first=is_first_record
@@ -39,6 +39,22 @@ def _get_step_state(
39
39
  step_states = workflow_state.get("step_states", {})
40
40
  return step_states.get("selection", {})
41
41
 
42
+ def _check_duplicate_media_path(
43
+ source_service: SourceService, # Source service for lookups
44
+ record_id: str, # Candidate record ID
45
+ provider_id: str, # Candidate provider ID
46
+ selected_sources: List[Dict[str, str]], # Current selections
47
+ ) -> bool: # True if adding would duplicate an audio file
48
+ """Check if adding a source would duplicate an already-selected audio file."""
49
+ candidate = source_service.get_transcription_by_id(record_id, provider_id)
50
+ if not candidate or not candidate.media_path:
51
+ return False
52
+ for s in selected_sources:
53
+ existing = source_service.get_transcription_by_id(s["record_id"], s["provider_id"])
54
+ if existing and existing.media_path == candidate.media_path:
55
+ return True
56
+ return False
57
+
42
58
  # %% ../../nbs/routes/core.ipynb #3zll5oy1hsc
43
59
  def _get_active_source_tab(
44
60
  state_store: WorkflowStateStore, # The workflow state store
@@ -14,14 +14,15 @@ from cjm_fasthtml_interactions.core.state_store import get_session_id
14
14
 
15
15
  from ..models import SelectionUrls
16
16
  from cjm_transcript_source_select.routes.core import (
17
- WorkflowStateStore, _get_step_state, _update_step_state, _build_queue_response
17
+ WorkflowStateStore, _get_step_state, _update_step_state, _build_queue_response,
18
+ _check_duplicate_media_path
18
19
  )
19
20
  from cjm_transcript_source_select.components.source_browser import (
20
21
  _render_source_list
21
22
  )
22
23
  from ..services.source import SourceService
23
24
  from cjm_transcript_source_select.services.source_utils import (
24
- filter_transcriptions, toggle_source_selection, reorder_item
25
+ filter_transcriptions, toggle_source_selection, reorder_item, is_source_selected
25
26
  )
26
27
 
27
28
  # %% ../../nbs/routes/filtering.ipynb #c4457084
@@ -100,6 +101,13 @@ def _handle_selection_toggle_focused(
100
101
  step_state = _get_step_state(state_store, workflow_id, session_id)
101
102
  selected_sources = step_state.get("selected_sources", [])
102
103
 
104
+ # Only check for duplicate media_path when adding (not removing)
105
+ if not is_source_selected(record_id, provider_id, selected_sources):
106
+ if _check_duplicate_media_path(source_service, record_id, provider_id, selected_sources):
107
+ return _build_queue_response(
108
+ state_store, workflow_id, source_service, session_id, selected_sources, urls
109
+ )
110
+
103
111
  selected_sources = toggle_source_selection(record_id, provider_id, selected_sources)
104
112
  _update_step_state(state_store, workflow_id, session_id, selected_sources)
105
113
 
@@ -14,14 +14,15 @@ from cjm_fasthtml_interactions.core.state_store import get_session_id
14
14
 
15
15
  from ..models import SelectionUrls
16
16
  from cjm_transcript_source_select.routes.core import (
17
- WorkflowStateStore, _get_step_state, _update_step_state, _build_queue_response
17
+ WorkflowStateStore, _get_step_state, _update_step_state, _build_queue_response,
18
+ _check_duplicate_media_path
18
19
  )
19
20
  from cjm_transcript_source_select.components.preview_panel import (
20
21
  _render_preview_panel
21
22
  )
22
23
  from ..services.source import SourceService
23
24
  from cjm_transcript_source_select.services.source_utils import (
24
- select_all_in_group, reorder_sources
25
+ select_all_in_group, reorder_sources, get_selected_media_paths
25
26
  )
26
27
 
27
28
  # %% ../../nbs/routes/queue.ipynb #a5934339
@@ -40,10 +41,17 @@ def _handle_selection_add(
40
41
  step_state = _get_step_state(state_store, workflow_id, session_id)
41
42
  selected_sources = step_state.get("selected_sources", [])
42
43
 
43
- # Check if already selected
44
- if not any(s.get("record_id") == record_id for s in selected_sources):
45
- selected_sources.append({"record_id": record_id, "provider_id": provider_id})
46
- _update_step_state(state_store, workflow_id, session_id, selected_sources)
44
+ # Check if already selected by (record_id, provider_id) pair
45
+ already_selected = any(
46
+ s.get("record_id") == record_id and s.get("provider_id") == provider_id
47
+ for s in selected_sources
48
+ )
49
+
50
+ if not already_selected:
51
+ # Reject if another source with the same audio file is already queued
52
+ if not _check_duplicate_media_path(source_service, record_id, provider_id, selected_sources):
53
+ selected_sources.append({"record_id": record_id, "provider_id": provider_id})
54
+ _update_step_state(state_store, workflow_id, session_id, selected_sources)
47
55
 
48
56
  return _build_queue_response(state_store, workflow_id, source_service, session_id, selected_sources, urls)
49
57
 
@@ -55,6 +63,7 @@ def _handle_selection_remove(
55
63
  request, # FastHTML request object
56
64
  sess, # FastHTML session object
57
65
  record_id: str, # Job ID to remove
66
+ provider_id: str, # Plugin name for the source
58
67
  urls: SelectionUrls, # URL bundle for rendering
59
68
  ): # Queue component with OOB stats, optionally with OOB source list
60
69
  """Remove a source from the selection queue."""
@@ -62,8 +71,11 @@ def _handle_selection_remove(
62
71
  step_state = _get_step_state(state_store, workflow_id, session_id)
63
72
  selected_sources = step_state.get("selected_sources", [])
64
73
 
65
- # Remove the item
66
- selected_sources = [s for s in selected_sources if s.get("record_id") != record_id]
74
+ # Remove by (record_id, provider_id) pair
75
+ selected_sources = [
76
+ s for s in selected_sources
77
+ if not (s.get("record_id") == record_id and s.get("provider_id") == provider_id)
78
+ ]
67
79
  _update_step_state(state_store, workflow_id, session_id, selected_sources)
68
80
 
69
81
  return _build_queue_response(state_store, workflow_id, source_service, session_id, selected_sources, urls)
@@ -120,13 +132,17 @@ def _handle_selection_select_all(
120
132
  grouping_mode: str, # Current grouping mode: "media_path" or "batch_id"
121
133
  urls: SelectionUrls, # URL bundle for rendering
122
134
  ): # Queue component with OOB stats, optionally with OOB source list
123
- """Select all transcriptions for a given group."""
135
+ """Select all transcriptions for a given group, skipping duplicate audio sources."""
124
136
  session_id = get_session_id(sess)
125
137
  step_state = _get_step_state(state_store, workflow_id, session_id)
126
138
  selected_sources = step_state.get("selected_sources", [])
127
139
 
128
140
  all_transcriptions = source_service.query_transcriptions(limit=500)
129
- selected_sources = select_all_in_group(all_transcriptions, group_key, grouping_mode, selected_sources)
141
+ excluded = get_selected_media_paths(selected_sources, all_transcriptions)
142
+ selected_sources = select_all_in_group(
143
+ all_transcriptions, group_key, grouping_mode, selected_sources,
144
+ excluded_media_paths=excluded,
145
+ )
130
146
 
131
147
  _update_step_state(state_store, workflow_id, session_id, selected_sources=selected_sources)
132
148
 
@@ -180,11 +196,11 @@ def init_queue_router(
180
196
  )
181
197
 
182
198
  @router
183
- def remove(request, sess, record_id: str):
199
+ def remove(request, sess, record_id: str, provider_id: str):
184
200
  """Remove a source from the selection queue."""
185
201
  return _handle_selection_remove(
186
202
  state_store, workflow_id, source_service,
187
- request, sess, record_id, urls=urls,
203
+ request, sess, record_id, provider_id, urls=urls,
188
204
  )
189
205
 
190
206
  @router
@@ -4,11 +4,12 @@
4
4
 
5
5
  # %% auto #0
6
6
  __all__ = ['extract_batch_id', 'extract_model_name', 'group_transcriptions', 'group_transcriptions_by_audio',
7
- 'is_source_selected', 'filter_transcriptions', 'select_all_in_group', 'toggle_source_selection',
8
- 'reorder_item', 'reorder_sources', 'calculate_next_tab', 'check_audio_exists', 'validate_browse_path']
7
+ 'is_source_selected', 'get_selected_media_paths', 'filter_transcriptions', 'select_all_in_group',
8
+ 'toggle_source_selection', 'reorder_item', 'reorder_sources', 'calculate_next_tab', 'check_audio_exists',
9
+ 'validate_browse_path']
9
10
 
10
11
  # %% ../../nbs/services/source_utils.ipynb #su-imports
11
- from typing import Any, List, Dict
12
+ from typing import Any, List, Dict, Optional, Set
12
13
  from pathlib import Path
13
14
  import json
14
15
 
@@ -85,10 +86,27 @@ def group_transcriptions_by_audio(
85
86
  # %% ../../nbs/services/source_utils.ipynb #su-is-source-selected
86
87
  def is_source_selected(
87
88
  record_id: str, # Job ID to check
89
+ provider_id: str, # Provider ID to check
88
90
  selected_sources: List[Dict[str, str]] # List of selected sources
89
91
  ) -> bool: # True if source is selected
90
- """Check if a source is in the selected list."""
91
- return any(s.get("record_id") == record_id for s in selected_sources)
92
+ """Check if a source is in the selected list by (record_id, provider_id) pair."""
93
+ return any(
94
+ s.get("record_id") == record_id and s.get("provider_id") == provider_id
95
+ for s in selected_sources
96
+ )
97
+
98
+ # %% ../../nbs/services/source_utils.ipynb #yt3azuiiy3g
99
+ def get_selected_media_paths(
100
+ selected_sources: List[Dict[str, str]], # Current selections (record_id, provider_id)
101
+ all_transcriptions: List[Dict[str, Any]], # All available transcription records
102
+ ) -> Set[str]: # Media paths already represented in selections
103
+ """Get the set of media_paths for currently selected sources."""
104
+ selected_keys = {(s.get("record_id"), s.get("provider_id")) for s in selected_sources}
105
+ return {
106
+ t.get("media_path") for t in all_transcriptions
107
+ if (t.get("record_id"), t.get("provider_id")) in selected_keys
108
+ and t.get("media_path")
109
+ }
92
110
 
93
111
  # %% ../../nbs/services/source_utils.ipynb #tg25xqgkaa
94
112
  def filter_transcriptions(
@@ -113,6 +131,7 @@ def select_all_in_group(
113
131
  group_key: str, # Group key to match against
114
132
  grouping_mode: str, # Grouping mode: "media_path" or "batch_id"
115
133
  selected_sources: List[Dict[str, str]], # Current selections
134
+ excluded_media_paths: Optional[Set[str]] = None, # Media paths to skip (already selected)
116
135
  ) -> List[Dict[str, str]]: # Updated selections with new items appended
117
136
  """Add all transcriptions matching a group key to the selection list, skipping duplicates."""
118
137
  # Filter transcriptions by group key
@@ -121,14 +140,24 @@ def select_all_in_group(
121
140
  else:
122
141
  matching = [t for t in transcriptions if t.get("media_path") == group_key]
123
142
 
124
- # Deduplicate against existing selections
125
- existing_record_ids = {s.get("record_id") for s in selected_sources}
143
+ # Deduplicate against existing selections using (record_id, provider_id) pairs
144
+ existing_keys = {(s.get("record_id"), s.get("provider_id")) for s in selected_sources}
145
+ used_paths = set(excluded_media_paths) if excluded_media_paths else set()
126
146
  result = list(selected_sources)
127
147
  for t in matching:
128
148
  record_id = t.get("record_id")
129
- if record_id and record_id not in existing_record_ids:
130
- result.append({"record_id": record_id, "provider_id": t.get("provider_id", "")})
131
- existing_record_ids.add(record_id)
149
+ provider_id = t.get("provider_id", "")
150
+ media_path = t.get("media_path")
151
+ key = (record_id, provider_id)
152
+ if not record_id or key in existing_keys:
153
+ continue
154
+ # Skip if media_path already represented
155
+ if excluded_media_paths is not None and media_path and media_path in used_paths:
156
+ continue
157
+ result.append({"record_id": record_id, "provider_id": provider_id})
158
+ existing_keys.add(key)
159
+ if media_path:
160
+ used_paths.add(media_path)
132
161
 
133
162
  return result
134
163
 
@@ -138,9 +167,11 @@ def toggle_source_selection(
138
167
  provider_id: str, # Plugin name for the source
139
168
  selected_sources: List[Dict[str, str]], # Current selections
140
169
  ) -> List[Dict[str, str]]: # Updated selections
141
- """Toggle a source in or out of the selection list."""
142
- if any(s.get("record_id") == record_id for s in selected_sources):
143
- return [s for s in selected_sources if s.get("record_id") != record_id]
170
+ """Toggle a source in or out of the selection list by (record_id, provider_id) pair."""
171
+ if any(s.get("record_id") == record_id and s.get("provider_id") == provider_id
172
+ for s in selected_sources):
173
+ return [s for s in selected_sources
174
+ if not (s.get("record_id") == record_id and s.get("provider_id") == provider_id)]
144
175
  else:
145
176
  return selected_sources + [{"record_id": record_id, "provider_id": provider_id}]
146
177
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: cjm-transcript-source-select
3
- Version: 0.0.1
3
+ Version: 0.0.2
4
4
  Summary: FastHTML source selection component for transcript decomposition workflows, with federated database browsing, drag-drop ordering, and keyboard navigation.
5
5
  Home-page: https://github.com/cj-mills/cjm-transcript-source-select
6
6
  Author: Christian J. Mills
@@ -105,51 +105,51 @@ graph LR
105
105
  components_local_files --> html_ids
106
106
  components_preview_panel --> html_ids
107
107
  components_selection_queue --> html_ids
108
- components_source_browser --> services_source_utils
109
108
  components_source_browser --> utils
109
+ components_source_browser --> services_source_utils
110
110
  components_source_browser --> html_ids
111
- components_step_renderer --> components_helpers
112
- components_step_renderer --> components_source_browser
113
111
  components_step_renderer --> utils
112
+ components_step_renderer --> components_helpers
114
113
  components_step_renderer --> components_preview_panel
115
- components_step_renderer --> html_ids
116
- components_step_renderer --> components_local_files
117
114
  components_step_renderer --> components_selection_queue
115
+ components_step_renderer --> components_local_files
116
+ components_step_renderer --> components_source_browser
118
117
  components_step_renderer --> models
119
- routes_core --> components_step_renderer
120
- routes_core --> models
121
- routes_core --> components_selection_queue
118
+ components_step_renderer --> html_ids
122
119
  routes_core --> components_source_browser
120
+ routes_core --> components_selection_queue
123
121
  routes_core --> services_source
124
- routes_filtering --> routes_core
125
- routes_filtering --> services_source_utils
126
- routes_filtering --> models
122
+ routes_core --> models
123
+ routes_core --> components_step_renderer
127
124
  routes_filtering --> components_source_browser
125
+ routes_filtering --> services_source_utils
126
+ routes_filtering --> routes_core
128
127
  routes_filtering --> services_source
129
- routes_init --> routes_queue
130
- routes_init --> routes_local_files
131
- routes_init --> models
128
+ routes_filtering --> models
132
129
  routes_init --> routes_filtering
133
- routes_init --> routes_tabs
134
130
  routes_init --> services_source
131
+ routes_init --> models
132
+ routes_init --> routes_queue
133
+ routes_init --> routes_local_files
135
134
  routes_init --> routes_core
135
+ routes_init --> routes_tabs
136
136
  routes_local_files --> components_local_files
137
137
  routes_local_files --> routes_core
138
138
  routes_local_files --> services_source
139
139
  routes_local_files --> models
140
140
  routes_queue --> routes_core
141
- routes_queue --> services_source_utils
141
+ routes_queue --> services_source
142
142
  routes_queue --> models
143
+ routes_queue --> services_source_utils
143
144
  routes_queue --> components_preview_panel
144
- routes_queue --> services_source
145
- routes_tabs --> routes_local_files
146
145
  routes_tabs --> components_local_files
147
- routes_tabs --> services_source_utils
146
+ routes_tabs --> routes_local_files
148
147
  routes_tabs --> routes_core
149
148
  routes_tabs --> components_source_browser
150
- routes_tabs --> models
151
149
  routes_tabs --> components_step_renderer
150
+ routes_tabs --> models
152
151
  routes_tabs --> services_source
152
+ routes_tabs --> services_source_utils
153
153
  ```
154
154
 
155
155
  *50 cross-module dependencies detected*
@@ -186,6 +186,16 @@ def _get_step_state(
186
186
  "Get the selection step state from the workflow state store."
187
187
  ```
188
188
 
189
+ ``` python
190
+ def _check_duplicate_media_path(
191
+ source_service: SourceService, # Source service for lookups
192
+ record_id: str, # Candidate record ID
193
+ provider_id: str, # Candidate provider ID
194
+ selected_sources: List[Dict[str, str]], # Current selections
195
+ ) -> bool: # True if adding would duplicate an audio file
196
+ "Check if adding a source would duplicate an already-selected audio file."
197
+ ```
198
+
189
199
  ``` python
190
200
  def _get_active_source_tab(
191
201
  state_store: WorkflowStateStore, # The workflow state store
@@ -658,6 +668,7 @@ def _handle_selection_remove(
658
668
  request, # FastHTML request object
659
669
  sess, # FastHTML session object
660
670
  record_id: str, # Job ID to remove
671
+ provider_id: str, # Plugin name for the source
661
672
  urls: SelectionUrls, # URL bundle for rendering
662
673
  ): # Queue component with OOB stats, optionally with OOB source list
663
674
  "Remove a source from the selection queue."
@@ -698,7 +709,7 @@ def _handle_selection_select_all(
698
709
  grouping_mode: str, # Current grouping mode: "media_path" or "batch_id"
699
710
  urls: SelectionUrls, # URL bundle for rendering
700
711
  ): # Queue component with OOB stats, optionally with OOB source list
701
- "Select all transcriptions for a given group."
712
+ "Select all transcriptions for a given group, skipping duplicate audio sources."
702
713
  ```
703
714
 
704
715
  ``` python
@@ -1088,6 +1099,7 @@ from cjm_transcript_source_select.services.source_utils import (
1088
1099
  group_transcriptions,
1089
1100
  group_transcriptions_by_audio,
1090
1101
  is_source_selected,
1102
+ get_selected_media_paths,
1091
1103
  filter_transcriptions,
1092
1104
  select_all_in_group,
1093
1105
  toggle_source_selection,
@@ -1133,9 +1145,18 @@ def group_transcriptions_by_audio(
1133
1145
  ``` python
1134
1146
  def is_source_selected(
1135
1147
  record_id: str, # Job ID to check
1148
+ provider_id: str, # Provider ID to check
1136
1149
  selected_sources: List[Dict[str, str]] # List of selected sources
1137
1150
  ) -> bool: # True if source is selected
1138
- "Check if a source is in the selected list."
1151
+ "Check if a source is in the selected list by (record_id, provider_id) pair."
1152
+ ```
1153
+
1154
+ ``` python
1155
+ def get_selected_media_paths(
1156
+ selected_sources: List[Dict[str, str]], # Current selections (record_id, provider_id)
1157
+ all_transcriptions: List[Dict[str, Any]], # All available transcription records
1158
+ ) -> Set[str]: # Media paths already represented in selections
1159
+ "Get the set of media_paths for currently selected sources."
1139
1160
  ```
1140
1161
 
1141
1162
  ``` python
@@ -1152,6 +1173,7 @@ def select_all_in_group(
1152
1173
  group_key: str, # Group key to match against
1153
1174
  grouping_mode: str, # Grouping mode: "media_path" or "batch_id"
1154
1175
  selected_sources: List[Dict[str, str]], # Current selections
1176
+ excluded_media_paths: Optional[Set[str]] = None, # Media paths to skip (already selected)
1155
1177
  ) -> List[Dict[str, str]]: # Updated selections with new items appended
1156
1178
  "Add all transcriptions matching a group key to the selection list, skipping duplicates."
1157
1179
  ```
@@ -1162,7 +1184,7 @@ def toggle_source_selection(
1162
1184
  provider_id: str, # Plugin name for the source
1163
1185
  selected_sources: List[Dict[str, str]], # Current selections
1164
1186
  ) -> List[Dict[str, str]]: # Updated selections
1165
- "Toggle a source in or out of the selection list."
1187
+ "Toggle a source in or out of the selection list by (record_id, provider_id) pair."
1166
1188
  ```
1167
1189
 
1168
1190
  ``` python
@@ -1,16 +1,10 @@
1
1
  [DEFAULT]
2
- # All sections below are required unless otherwise specified.
3
- # See https://github.com/AnswerDotAI/nbdev/blob/main/settings.ini for examples.
4
-
5
- ### Python library ###
6
2
  repo = cjm-transcript-source-select
7
- lib_name = %(repo)s
8
- version = 0.0.1
3
+ lib_name = cjm-transcript-source-select
4
+ version = 0.0.2
9
5
  min_python = 3.12
10
6
  license = apache2
11
7
  black_formatting = False
12
-
13
- ### nbdev ###
14
8
  doc_path = _docs
15
9
  lib_path = cjm_transcript_source_select
16
10
  nbs_path = nbs
@@ -18,29 +12,27 @@ recursive = True
18
12
  tst_flags = notest
19
13
  put_version_in_init = True
20
14
  update_pyproject = True
21
-
22
- ### Docs ###
23
15
  branch = main
24
16
  custom_sidebar = False
25
- doc_host = https://%(user)s.github.io
26
- doc_baseurl = /%(repo)s
27
- git_url = https://github.com/%(user)s/%(repo)s
28
- title = %(lib_name)s
29
-
30
- ### PyPI ###
17
+ doc_host = https://cj-mills.github.io
18
+ doc_baseurl = /cjm-transcript-source-select
19
+ git_url = https://github.com/cj-mills/cjm-transcript-source-select
20
+ title = cjm-transcript-source-select
31
21
  audience = Developers
32
22
  author = Christian J. Mills
33
23
  author_email = 9126128+cj-mills@users.noreply.github.com
34
- copyright = 2026 onwards, %(author)s
24
+ copyright = 2026 onwards, Christian J. Mills
35
25
  description = FastHTML source selection component for transcript decomposition workflows, with federated database browsing, drag-drop ordering, and keyboard navigation.
36
26
  keywords = nbdev jupyter notebook python
37
27
  language = English
38
28
  status = 3
39
29
  user = cj-mills
30
+ requirements = cjm-plugin-system cjm-transcription-plugin-system cjm-fasthtml-app-core cjm-fasthtml-daisyui cjm_fasthtml_lucide_icons cjm_fasthtml_file_browser cjm_fasthtml_keyboard_navigation duckdb pandas cjm_workflow_state cjm_source_provider cjm_fasthtml_interactions
31
+ readme_nb = index.ipynb
32
+ allowed_metadata_keys =
33
+ allowed_cell_metadata_keys =
34
+ jupyter_hooks = False
35
+ clean_ids = True
36
+ clear_all = False
37
+ skip_procs =
40
38
 
41
- ### Optional ###
42
- requirements = cjm-plugin-system cjm-transcription-plugin-system cjm-fasthtml-app-core cjm-fasthtml-daisyui cjm_fasthtml_lucide_icons cjm_fasthtml_file_browser cjm_fasthtml_keyboard_navigation duckdb pandas cjm_workflow_state cjm_source_provider cjm_fasthtml_interactions
43
- # dev_requirements =
44
- # console_scripts =
45
- # conda_user =
46
- # package_data =
@@ -1 +0,0 @@
1
- __version__ = "0.0.1"