duckrun 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
duckrun/core.py CHANGED
@@ -64,44 +64,44 @@ class Duckrun:
64
64
  def _attach_lakehouse(self):
65
65
  self._create_onelake_secret()
66
66
  try:
67
- # Exclude Iceberg metadata folders when scanning for Delta tables
67
+ # Use expensive list operation but filter for _delta_log folders only
68
+ # This avoids parsing JSON content that causes Iceberg metadata issues
69
+ print(f"Scanning for Delta tables in {self.schema}... (this may take a moment)")
70
+
68
71
  list_tables_query = f"""
69
- SELECT DISTINCT(split_part(file, '_delta_log', 1)) as tables
70
- FROM glob ("abfss://{self.workspace}@onelake.dfs.fabric.microsoft.com/{self.lakehouse_name}.Lakehouse/Tables/*/*/_delta_log/*.json")
71
- WHERE file NOT LIKE '%/metadata/%'
72
+ SELECT DISTINCT
73
+ regexp_extract(file, 'Tables/{self.schema}/([^/]+)/_delta_log', 1) as table_name
74
+ FROM glob("abfss://{self.workspace}@onelake.dfs.fabric.microsoft.com/{self.lakehouse_name}.Lakehouse/Tables/{self.schema}/**")
75
+ WHERE file LIKE '%/_delta_log/%'
76
+ AND file NOT LIKE '%/metadata/%'
72
77
  AND file NOT LIKE '%/iceberg/%'
73
- AND split_part(file, '_delta_log', 1) NOT LIKE '%/metadata'
74
- AND split_part(file, '_delta_log', 1) NOT LIKE '%/iceberg'
78
+ AND regexp_extract(file, 'Tables/{self.schema}/([^/]+)/_delta_log', 1) IS NOT NULL
75
79
  """
80
+
76
81
  list_tables_df = self.con.sql(list_tables_query).df()
77
- list_tables = list_tables_df['tables'].tolist() if not list_tables_df.empty else []
78
-
79
- if not list_tables:
80
- print(f"No Delta tables found in {self.lakehouse_name}.Lakehouse/Tables.")
82
+
83
+ if list_tables_df.empty:
84
+ print(f"No Delta tables found in {self.lakehouse_name}.Lakehouse/Tables/{self.schema}.")
81
85
  return
86
+
87
+ table_names = list_tables_df['table_name'].tolist()
82
88
 
83
- print(f"Found {len(list_tables)} Delta tables. Attaching as views...")
89
+ print(f"Found {len(table_names)} Delta tables. Attaching as views...")
84
90
 
85
- for table_path in list_tables:
86
- parts = table_path.strip("/").split("/")
87
- if len(parts) >= 2:
88
- potential_schema = parts[-2]
89
- table = parts[-1]
90
-
91
- # Skip Iceberg-related folders
92
- if table in ('metadata', 'iceberg') or potential_schema in ('metadata', 'iceberg'):
93
- continue
94
-
95
- if potential_schema == self.schema:
96
- try:
97
- self.con.sql(f"""
98
- CREATE OR REPLACE VIEW {table}
99
- AS SELECT * FROM delta_scan('{self.table_base_url}{self.schema}/{table}');
100
- """)
101
- print(f" ✓ Attached: {table}")
102
- except Exception as e:
103
- print(f" ⚠ Skipped {table}: {str(e)[:100]}")
104
- continue
91
+ for table in table_names:
92
+ # Skip Iceberg-related folders and empty names
93
+ if not table or table in ('metadata', 'iceberg'):
94
+ continue
95
+
96
+ try:
97
+ self.con.sql(f"""
98
+ CREATE OR REPLACE VIEW {table}
99
+ AS SELECT * FROM delta_scan('{self.table_base_url}{self.schema}/{table}');
100
+ """)
101
+ print(f" ✓ Attached: {table}")
102
+ except Exception as e:
103
+ print(f" ⚠ Skipped {table}: {str(e)[:100]}")
104
+ continue
105
105
 
106
106
  print("\nAttached tables (views) in DuckDB:")
107
107
  self.con.sql("SELECT name FROM (SHOW ALL TABLES) WHERE database='memory'").show()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: duckrun
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: Lakehouse task runner powered by DuckDB for Microsoft Fabric
5
5
  License-Expression: MIT
6
6
  Project-URL: Homepage, https://github.com/djouallah/duckrun
@@ -0,0 +1,7 @@
1
+ duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
2
+ duckrun/core.py,sha256=Ok2IS15NcV6zFuFKFi2GOe1NKREoBQzjwAay-fCNf38,13774
3
+ duckrun-0.1.3.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
4
+ duckrun-0.1.3.dist-info/METADATA,sha256=BYek_gAWR_6QdCAJQAV7QnhoSQsaG0aprlMtAce9Z0k,3805
5
+ duckrun-0.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
+ duckrun-0.1.3.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
7
+ duckrun-0.1.3.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- duckrun/__init__.py,sha256=L0jRtD9Ld8Ti4e6GRvPDdHvkQCFAPHM43GSP7ARh6EM,241
2
- duckrun/core.py,sha256=GvOPfrPBkqn4fUxMZLLu7QcWaJifViPlyRrHwKfGQTU,13883
3
- duckrun-0.1.2.dist-info/licenses/LICENSE,sha256=-DeQQwdbCbkB4507ZF3QbocysB-EIjDtaLexvqRkGZc,1083
4
- duckrun-0.1.2.dist-info/METADATA,sha256=kd0HnT4a20kJ4_bBMsUtblrkmiFXF025zmdYmsj2B1s,3805
5
- duckrun-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
6
- duckrun-0.1.2.dist-info/top_level.txt,sha256=BknMEwebbUHrVAp3SC92ps8MPhK7XSYsaogTvi_DmEU,8
7
- duckrun-0.1.2.dist-info/RECORD,,