dataforge-core 1.4.0__tar.gz → 1.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. {dataforge_core-1.4.0/cli/dataforge_core.egg-info → dataforge_core-1.5.0}/PKG-INFO +1 -1
  2. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge/resources/pg_deploy.sql +89 -26
  3. {dataforge_core-1.4.0 → dataforge_core-1.5.0/cli/dataforge_core.egg-info}/PKG-INFO +1 -1
  4. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/pyproject.toml +1 -1
  5. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/LICENSE +0 -0
  6. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/MANIFEST.in +0 -0
  7. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/README.md +0 -0
  8. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge/__init__.py +0 -0
  9. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge/databricks_sql.py +0 -0
  10. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge/importProject.py +0 -0
  11. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge/main.py +0 -0
  12. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge/mainConfig.py +0 -0
  13. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge/miniSparky.py +0 -0
  14. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge/pg.py +0 -0
  15. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge/resources/log4j2.properties +0 -0
  16. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge/resources/project/meta.yaml +0 -0
  17. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge/resources/project/outputs/feature_customer.yaml +0 -0
  18. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge/resources/project/relations.yaml +0 -0
  19. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge/resources/project/sources/tpch_customer.yaml +0 -0
  20. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge/resources/project/sources/tpch_lineitem.yaml +0 -0
  21. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge/resources/project/sources/tpch_nations.yaml +0 -0
  22. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge/resources/project/sources/tpch_orders.yaml +0 -0
  23. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge/resources/project/sources/tpch_part.yaml +0 -0
  24. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge/resources/project/sources/tpch_partsupp.yaml +0 -0
  25. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge/resources/project/sources/tpch_region.yaml +0 -0
  26. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge/resources/project/sources/tpch_supplier.yaml +0 -0
  27. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge/util.py +0 -0
  28. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge_core.egg-info/SOURCES.txt +0 -0
  29. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge_core.egg-info/dependency_links.txt +0 -0
  30. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge_core.egg-info/entry_points.txt +0 -0
  31. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge_core.egg-info/requires.txt +0 -0
  32. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/cli/dataforge_core.egg-info/top_level.txt +0 -0
  33. {dataforge_core-1.4.0 → dataforge_core-1.5.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataforge-core
3
- Version: 1.4.0
3
+ Version: 1.5.0
4
4
  Summary: Command line compiler for dataforge core projects
5
5
  Author-email: Vadim Orlov <vorlov@dataforgelabs.com>
6
6
  Project-URL: Homepage, https://github.com/dataforgelabs/dataforge-core
@@ -1,4 +1,4 @@
1
- --Built: Fri Aug 8 16:10:46 UTC 2025
1
+ --Built: Wed Jan 21 20:38:48 UTC 2026
2
2
  CREATE SCHEMA IF NOT EXISTS meta;
3
3
  CREATE SCHEMA IF NOT EXISTS log;
4
4
  DO $$
@@ -187,8 +187,6 @@ CREATE TABLE IF NOT EXISTS meta.output_column
187
187
  updated_userid text COLLATE pg_catalog."default",
188
188
  update_datetime timestamp without time zone,
189
189
  description text COLLATE pg_catalog."default",
190
- partition_ordinal integer,
191
- zorder_ordinal integer,
192
190
  CONSTRAINT pk_output_column PRIMARY KEY (output_column_id),
193
191
  CONSTRAINT fk_output_column_output_id FOREIGN KEY (output_id)
194
192
  REFERENCES meta.output (output_id) MATCH SIMPLE
@@ -210,7 +208,6 @@ CREATE TABLE IF NOT EXISTS meta.aggregate (
210
208
  numeric_flag BOOLEAN,
211
209
  data_type TEXT
212
210
  );
213
-
214
211
  TRUNCATE TABLE meta.aggregate;
215
212
  INSERT INTO meta.aggregate(aggregate_name, description, numeric_flag, data_type)
216
213
  VALUES ('any', 'Returns true if at least one value of `expr` is true.', FALSE, 'boolean'),
@@ -293,7 +290,6 @@ CREATE TABLE IF NOT EXISTS meta.enrichment
293
290
  ON DELETE NO ACTION,
294
291
  CONSTRAINT enrichment_rule_type_code_check CHECK (rule_type_code = ANY (ARRAY['V'::bpchar, 'E'::bpchar])),
295
292
  CONSTRAINT enrichment_datatype_fkey FOREIGN KEY (datatype) REFERENCES meta.attribute_type(hive_type),
296
- CONSTRAINT enrichment_cast_datatype_fkey FOREIGN KEY (datatype) REFERENCES meta.attribute_type(hive_type),
297
293
  CONSTRAINT enrichment_parent_enrichment_id_fkey FOREIGN KEY (parent_enrichment_id)
298
294
  REFERENCES meta.enrichment (enrichment_id) ON DELETE CASCADE
299
295
  );
@@ -689,6 +685,17 @@ v_ret := v_ret || jsonb_build_object('source_cardinality',v_decoded[1]) || jsonb
689
685
 
690
686
  RETURN v_ret - 'name' - 'cardinality';
691
687
 
688
+ END;
689
+ $function$;CREATE OR REPLACE FUNCTION meta.imp_map_connection_type(in_body jsonb)
690
+ RETURNS jsonb
691
+ LANGUAGE plpgsql
692
+ AS
693
+ $function$
694
+ DECLARE
695
+ v_connection_type text = in_body->>'connection_type';
696
+
697
+ BEGIN
698
+ RETURN CASE v_connection_type WHEN 'custom' THEN in_body || jsonb_build_object('connection_type','custom_ingestion') ELSE in_body END;
692
699
  END;
693
700
  $function$;CREATE OR REPLACE FUNCTION meta.imp_map_relations(in_relation_uids jsonb)
694
701
  RETURNS int[]
@@ -740,7 +747,7 @@ BEGIN
740
747
  SELECT import_object_id, body_text::jsonb body
741
748
  FROM meta.import_object WHERE import_id = in_import_id)
742
749
  UPDATE meta.import_object io
743
- SET body = p.body,
750
+ SET body = CASE WHEN object_type IN ('source') THEN meta.imp_map_connection_type(p.body) ELSE p.body END,
744
751
  name = CASE WHEN object_type IN ('source' ,'output', 'group', 'token') THEN p.body->>(object_type || '_name')
745
752
  WHEN object_type IN ('output_template','source_template') THEN p.body->>'object_name'
746
753
  ELSE p.body->>'name' END
@@ -2277,6 +2284,7 @@ DECLARE
2277
2284
  v_processing_type text;
2278
2285
  v_loop_check text;
2279
2286
  v_upstream_keep_current_flag boolean;
2287
+ v_platform text = meta.u_sys_config('lakehouse-platform');
2280
2288
 
2281
2289
  BEGIN
2282
2290
 
@@ -2293,11 +2301,14 @@ THEN
2293
2301
  RETURN json_build_object('error', 'Please use the description section for any comments.', 'expression', NULL);
2294
2302
  END IF;
2295
2303
 
2304
+
2296
2305
  -- check attribute name syntax
2297
- IF NOT in_enr.attribute_name ~ '^[a-z_]+[a-z0-9_]*$'
2298
- THEN
2299
- v_attribute_name_error := 'Invalid attribute name syntax. Attribute name has to start with lowercase letter or _ It may contain lowercase letters, numbers and _';
2300
- END IF;
2306
+ v_attribute_name_error := CASE
2307
+ WHEN v_platform = 'snowflake' AND NOT in_enr.attribute_name ~ '^[A-Z_]+[A-Z0-9_]*$' THEN
2308
+ 'Invalid attribute name syntax. Attribute name has to start with uppercase letter or _ It may contain uppercase letters, numbers and _'
2309
+ WHEN v_platform = 'databricks' AND NOT in_enr.attribute_name ~ '^[a-z_]+[a-z0-9_]*$' THEN
2310
+ 'Invalid attribute name syntax. Attribute name has to start with lowercase letter or _ It may contain lowercase letters, numbers and _'
2311
+ END;
2301
2312
 
2302
2313
  v_template_match_flag := in_template_check_flag and exists (select 1 from meta.enrichment WHERE source_id = in_enr.source_id AND expression = in_enr.expression AND attribute_name = in_enr.attribute_name AND name = in_enr.name);
2303
2314
 
@@ -2955,7 +2966,11 @@ BEGIN
2955
2966
  END IF;
2956
2967
 
2957
2968
  v_ret_expression := v_ret_expression || CASE WHEN v_aggregates_exist_flag AND v_aggregate_id IS NULL
2958
- THEN 'first_value(' || v_attribute_name || ')' -- wrap non-aggregated parameter into aggregate for data type testing purposes only
2969
+ THEN -- wrap non-aggregated parameter into aggregate for data type testing purposes only
2970
+ CASE meta.u_sys_config('lakehouse-platform')
2971
+ WHEN 'databricks' THEN 'first_value(' || v_attribute_name || ')'
2972
+ WHEN 'snowflake' THEN 'ARRAY_AGG(' || v_attribute_name || ')[0]'
2973
+ END
2959
2974
  ELSE v_attribute_name END;
2960
2975
 
2961
2976
  v_last_end = v_end + 1;
@@ -2964,9 +2979,13 @@ BEGIN
2964
2979
  v_ret_expression := v_ret_expression || substr(in_expression,v_last_end);
2965
2980
 
2966
2981
  IF NULLIF(in_cast_datatype,'') IS NOT NULL THEN
2982
+ IF in_cast_datatype = 'DECIMAL' THEN
2983
+ in_cast_datatype = 'NUMERIC(38,12)';
2984
+ END IF;
2967
2985
  v_ret_expression := format('CAST(%s as %s)',v_ret_expression,in_cast_datatype);
2968
2986
  END IF;
2969
2987
 
2988
+
2970
2989
  RAISE DEBUG 'v_exp_test_select_list: %',v_exp_test_select_list;
2971
2990
 
2972
2991
  IF cardinality(v_exp_test_select_list) > 0 THEN
@@ -3034,7 +3053,11 @@ BEGIN
3034
3053
 
3035
3054
  v_ret_expression := replace(v_ret_expression, format('P<%s>', v_ep.enrichment_parameter_id),
3036
3055
  CASE WHEN v_aggregates_exist_flag AND v_ep.aggregation_id IS NULL
3037
- THEN 'first_value(' || v_attribute_name || ')' -- wrap non-aggregated parameter into aggregate for data type testing purposes only
3056
+ THEN -- wrap non-aggregated parameter into aggregate for data type testing purposes only
3057
+ CASE meta.u_sys_config('lakehouse-platform')
3058
+ WHEN 'databricks' THEN 'first_value(' || v_attribute_name || ')'
3059
+ WHEN 'snowflake' THEN 'ARRAY_AGG(' || v_attribute_name || ')[0]'
3060
+ END
3038
3061
  ELSE v_attribute_name END);
3039
3062
 
3040
3063
  IF in_root_id IS NOT NULL AND v_param.type IN ('raw','enrichment') THEN
@@ -3168,21 +3191,21 @@ BEGIN
3168
3191
  --Recursively get all downstream rules.
3169
3192
  -- Include rules used by relations
3170
3193
  --Return rule chain if enrichment contains downstream self-reference
3194
+ -- Stop check on first duplicate rule in chain
3171
3195
 
3172
3196
 
3173
3197
  WITH RECURSIVE ds AS (
3174
- SELECT enrichment_id, path, 0 level
3175
- FROM meta.u_get_upstream_rules(in_enrichment_id) ur
3198
+ SELECT in_enrichment_id enrichment_id, ARRAY[v_start_node_id] path, 0 level, false loop_flag
3176
3199
  UNION ALL
3177
- SELECT der.enrichment_id, ds.path || der.path, ds.level + 1
3200
+ SELECT der.enrichment_id, ds.path || der.path, ds.level + 1, der.path && ds.path
3178
3201
  FROM ds
3179
3202
  CROSS JOIN LATERAL meta.u_get_upstream_rules(ds.enrichment_id, ds.level + 1) der
3180
- WHERE NOT v_start_node_id = ANY(ds.path)
3203
+ WHERE NOT ds.loop_flag AND ds.level < 10
3181
3204
  )
3182
- SELECT ARRAY['EN' || in_enrichment_id] || ds.path
3205
+ SELECT ds.path
3183
3206
  INTO v_circular_path
3184
3207
  FROM ds
3185
- WHERE enrichment_id = in_enrichment_id;
3208
+ WHERE loop_flag;
3186
3209
 
3187
3210
 
3188
3211
 
@@ -3282,7 +3305,8 @@ ELSEIF in_datatype = 'array' THEN
3282
3305
  ELSEIF in_datatype like 'decimal(%' THEN
3283
3306
  v_exp := format('CAST(`decimal` AS decimal(38,12))',in_datatype);
3284
3307
  ELSE
3285
- v_exp := '`' || in_datatype || '`';
3308
+ v_exp := CASE meta.u_sys_config('lakehouse-platform') WHEN 'databricks' THEN '`' || in_datatype || '`'
3309
+ WHEN 'snowflake' THEN '"' || in_datatype || '"' END;
3286
3310
  END IF;
3287
3311
 
3288
3312
  RETURN v_exp;
@@ -3596,9 +3620,7 @@ v_in_path_length := cardinality(in_start_path);
3596
3620
  IF to_regclass('meta.system_configuration') IS NULL THEN
3597
3621
  v_max_length := greatest(4, v_in_path_length + 2);
3598
3622
  ELSE
3599
- SELECT greatest(sc.value::int, v_in_path_length + 2) INTO v_max_length
3600
- FROM meta.system_configuration sc
3601
- WHERE sc.name = 'max-relation-hops';
3623
+ v_max_length := greatest(meta.u_sys_config('max-relation-hops')::int, v_in_path_length + 2);
3602
3624
  END IF;
3603
3625
 
3604
3626
  SELECT array_agg(r.id) INTO v_missing_relation_ids
@@ -3892,7 +3914,30 @@ END;
3892
3914
 
3893
3915
  $function$;
3894
3916
 
3895
- -- build and return datatype_schema if it's null (legacy pre-8.0 attribute)
3917
+
3918
+ CREATE OR REPLACE FUNCTION meta.u_get_schema_from_ddl_type(in_datatype text)
3919
+ RETURNS jsonb
3920
+ LANGUAGE 'plpgsql'
3921
+
3922
+ AS $BODY$
3923
+
3924
+ DECLARE
3925
+ v_type text;
3926
+
3927
+ BEGIN
3928
+
3929
+ v_type := CASE WHEN in_datatype LIKE 'VARCHAR%' THEN 'VARCHAR'
3930
+ WHEN in_datatype LIKE 'TIMESTAMP%' THEN 'TIMESTAMP'
3931
+ WHEN in_datatype = 'NUMBER%' THEN CASE WHEN in_datatype ~ ',0\)' THEN 'INTEGER' ELSE 'DECIMAL' END
3932
+ ELSE (SELECT hive_type FROM meta.attribute_type WHERE hive_ddl_type = in_datatype)
3933
+ END;
3934
+
3935
+ RETURN to_jsonb(v_type);
3936
+
3937
+
3938
+ END;
3939
+
3940
+ $BODY$;-- build and return datatype_schema if it's null (legacy pre-8.0 attribute)
3896
3941
  CREATE OR REPLACE FUNCTION meta.u_get_schema_from_type(in_schema jsonb, in_datatype text)
3897
3942
  RETURNS jsonb
3898
3943
  LANGUAGE 'plpgsql'
@@ -3903,11 +3948,14 @@ DECLARE
3903
3948
 
3904
3949
  v_type text;
3905
3950
 
3951
+
3906
3952
  BEGIN
3907
3953
 
3908
3954
  IF in_schema IS NOT NULL THEN
3909
3955
  IF jsonb_typeof(in_schema) = 'string' AND in_schema->>0 like 'decimal%' THEN
3910
3956
  RETURN to_jsonb('decimal(38,12)'::text);
3957
+ ELSEIF jsonb_typeof(in_schema) = 'string' AND in_schema->>0 like 'DECIMAL%' THEN
3958
+ RETURN to_jsonb('DECIMAL(38,12)'::text);
3911
3959
  ELSE
3912
3960
  RETURN in_schema;
3913
3961
  END IF;
@@ -3918,6 +3966,8 @@ IF in_datatype = 'int' THEN
3918
3966
  RETURN to_jsonb('integer'::text);
3919
3967
  ELSEIF in_datatype LIKE 'decimal%' THEN
3920
3968
  RETURN to_jsonb('decimal(38,12)'::text);
3969
+ ELSEIF in_datatype LIKE 'DECIMAL%' THEN
3970
+ RETURN to_jsonb('DECIMAL(38,12)'::text);
3921
3971
  ELSEIF in_datatype IN ('struct','array') THEN
3922
3972
  RETURN null;
3923
3973
  END IF;
@@ -4054,6 +4104,8 @@ IF v_type IN ('integer', 'byte','short') THEN
4054
4104
  v_type := 'int';
4055
4105
  ELSEIF v_type LIKE 'decimal%' THEN
4056
4106
  v_type := 'decimal';
4107
+ ELSEIF v_type LIKE 'DECIMAL%' THEN
4108
+ v_type := 'DECIMAL';
4057
4109
  END IF;
4058
4110
 
4059
4111
  RETURN v_type;
@@ -4075,7 +4127,7 @@ BEGIN
4075
4127
  -- Include rules used by relations
4076
4128
  --Return rule chain if enrichment contains downstream self-reference
4077
4129
 
4078
- IF in_level = 0 AND EXISTS(SELECT 1 FROM information_schema.columns where table_name = '_params' AND column_name = 'type')
4130
+ IF in_level = 1 AND EXISTS(SELECT 1 FROM information_schema.columns where table_name = '_params' AND column_name = 'type')
4079
4131
  THEN
4080
4132
 
4081
4133
  RETURN QUERY (
@@ -4588,7 +4640,7 @@ CREATE TEMP TABLE _aggs_parsed
4588
4640
  THEN
4589
4641
  -- Check for window function
4590
4642
  v_follow_text := substring(v_expression, v_expression_position + 1, length(v_expression));
4591
- v_window_function_flag := v_follow_text ~* '^\s*over\s*\(';
4643
+ v_window_function_flag := v_follow_text ~* '(^|\s*within group\s*\([^\)]+\))\s*over\s*\(';
4592
4644
 
4593
4645
  RAISE DEBUG 'Out of parenthesis. position=% v_window_function_flag=% v_follow_text=%',v_expression_position,v_window_function_flag, v_follow_text;
4594
4646
  v_inner_text := substring(v_expression, v_aggregate_start_position, v_expression_position - v_aggregate_start_position);
@@ -4844,7 +4896,18 @@ END LOOP;
4844
4896
  RETURN v_ret;
4845
4897
  END;
4846
4898
 
4847
- $function$;CREATE OR REPLACE FUNCTION meta.u_validate_expression_parameters(in_enr meta.enrichment)
4899
+ $function$;CREATE OR REPLACE FUNCTION meta.u_sys_config(in_name text)
4900
+ RETURNS text
4901
+ LANGUAGE 'plpgsql'
4902
+ AS $BODY$
4903
+
4904
+ BEGIN
4905
+
4906
+ RETURN CASE WHEN in_name = 'lakehouse-platform' THEN 'databricks' END;
4907
+
4908
+ END;
4909
+ $BODY$;
4910
+ CREATE OR REPLACE FUNCTION meta.u_validate_expression_parameters(in_enr meta.enrichment)
4848
4911
  RETURNS text
4849
4912
  LANGUAGE 'plpgsql'
4850
4913
  AS $BODY$
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataforge-core
3
- Version: 1.4.0
3
+ Version: 1.5.0
4
4
  Summary: Command line compiler for dataforge core projects
5
5
  Author-email: Vadim Orlov <vorlov@dataforgelabs.com>
6
6
  Project-URL: Homepage, https://github.com/dataforgelabs/dataforge-core
@@ -3,7 +3,7 @@ requires = ["setuptools"]
3
3
  build-backend = "setuptools.build_meta"
4
4
  [project]
5
5
  name = "dataforge-core"
6
- version = "v1.4.0"
6
+ version = "v1.5.0"
7
7
  authors = [
8
8
  {name="Vadim Orlov", email="vorlov@dataforgelabs.com"}
9
9
  ]
File without changes
File without changes
File without changes