recce-nightly 1.2.0.20250506__py3-none-any.whl → 1.26.0.20251124__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recce-nightly might be problematic. Click here for more details.

Files changed (213) hide show
  1. recce/VERSION +1 -1
  2. recce/__init__.py +27 -22
  3. recce/adapter/base.py +11 -14
  4. recce/adapter/dbt_adapter/__init__.py +810 -480
  5. recce/adapter/dbt_adapter/dbt_version.py +3 -0
  6. recce/adapter/sqlmesh_adapter.py +24 -35
  7. recce/apis/check_api.py +39 -28
  8. recce/apis/check_func.py +33 -27
  9. recce/apis/run_api.py +25 -19
  10. recce/apis/run_func.py +29 -23
  11. recce/artifact.py +119 -51
  12. recce/cli.py +1299 -323
  13. recce/config.py +42 -33
  14. recce/connect_to_cloud.py +138 -0
  15. recce/core.py +55 -47
  16. recce/data/404.html +1 -1
  17. recce/data/__next.__PAGE__.txt +10 -0
  18. recce/data/__next._full.txt +23 -0
  19. recce/data/__next._head.txt +8 -0
  20. recce/data/__next._index.txt +8 -0
  21. recce/data/__next._tree.txt +5 -0
  22. recce/data/_next/static/52aV_JrNUZU6dMFgvTQEO/_buildManifest.js +11 -0
  23. recce/data/_next/static/52aV_JrNUZU6dMFgvTQEO/_clientMiddlewareManifest.json +1 -0
  24. recce/data/_next/static/chunks/02b996c7f6a29a06.js +4 -0
  25. recce/data/_next/static/chunks/19c10d219a6a21ff.js +1 -0
  26. recce/data/_next/static/chunks/2df9ec28a061971d.js +11 -0
  27. recce/data/_next/static/chunks/3098c987393bda15.js +1 -0
  28. recce/data/_next/static/chunks/393dc43e483f717a.css +2 -0
  29. recce/data/_next/static/chunks/399e8d91a7e45073.js +2 -0
  30. recce/data/_next/static/chunks/4d0186f631230245.js +1 -0
  31. recce/data/_next/static/chunks/5794ba9e10a9c060.js +11 -0
  32. recce/data/_next/static/chunks/715761c929a3f28b.js +110 -0
  33. recce/data/_next/static/chunks/71f88fcc615bf282.js +1 -0
  34. recce/data/_next/static/chunks/80d2a95eaf1201ea.js +1 -0
  35. recce/data/_next/static/chunks/9979c6109bbbee35.js +1 -0
  36. recce/data/_next/static/chunks/99d638224186c118.js +1 -0
  37. recce/data/_next/static/chunks/d003eb36240e92f3.js +1 -0
  38. recce/data/_next/static/chunks/d3167cdfec4fc351.js +1 -0
  39. recce/data/_next/static/chunks/e124bccf574a3361.css +1 -0
  40. recce/data/_next/static/chunks/f40141db1bdb46f0.css +6 -0
  41. recce/data/_next/static/chunks/fcc53a88741a52f9.js +1 -0
  42. recce/data/_next/static/chunks/turbopack-b1920d28cfb1f28d.js +3 -0
  43. recce/data/_next/static/media/favicon.a8d38d84.ico +0 -0
  44. recce/data/_next/static/media/montserrat-cyrillic-800-normal.d80d830d.woff2 +0 -0
  45. recce/data/_next/static/media/montserrat-cyrillic-800-normal.f9d58125.woff +0 -0
  46. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.076c2a93.woff2 +0 -0
  47. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.a4fa76b5.woff +0 -0
  48. recce/data/_next/static/media/montserrat-latin-800-normal.cde454cc.woff2 +0 -0
  49. recce/data/_next/static/media/montserrat-latin-800-normal.d5761935.woff +0 -0
  50. recce/data/_next/static/media/montserrat-latin-ext-800-normal.40ec0659.woff2 +0 -0
  51. recce/data/_next/static/media/montserrat-latin-ext-800-normal.b671449b.woff +0 -0
  52. recce/data/_next/static/media/montserrat-vietnamese-800-normal.9f7b8541.woff +0 -0
  53. recce/data/_next/static/media/montserrat-vietnamese-800-normal.f9eb854e.woff2 +0 -0
  54. recce/data/_next/static/media/reload-image.7aa931c7.svg +4 -0
  55. recce/data/_not-found/__next._full.txt +17 -0
  56. recce/data/_not-found/__next._head.txt +8 -0
  57. recce/data/_not-found/__next._index.txt +8 -0
  58. recce/data/_not-found/__next._not-found.__PAGE__.txt +5 -0
  59. recce/data/_not-found/__next._not-found.txt +4 -0
  60. recce/data/_not-found/__next._tree.txt +3 -0
  61. recce/data/_not-found.html +1 -0
  62. recce/data/_not-found.txt +17 -0
  63. recce/data/auth_callback.html +68 -0
  64. recce/data/imgs/reload-image.svg +4 -0
  65. recce/data/index.html +1 -27
  66. recce/data/index.txt +23 -7
  67. recce/diff.py +6 -12
  68. recce/event/__init__.py +86 -74
  69. recce/event/collector.py +33 -22
  70. recce/event/track.py +49 -27
  71. recce/exceptions.py +1 -1
  72. recce/git.py +7 -7
  73. recce/github.py +57 -53
  74. recce/mcp_server.py +716 -0
  75. recce/models/__init__.py +4 -1
  76. recce/models/check.py +6 -7
  77. recce/models/run.py +1 -0
  78. recce/models/types.py +131 -28
  79. recce/pull_request.py +27 -25
  80. recce/run.py +165 -121
  81. recce/server.py +303 -111
  82. recce/state/__init__.py +31 -0
  83. recce/state/cloud.py +632 -0
  84. recce/state/const.py +26 -0
  85. recce/state/local.py +56 -0
  86. recce/state/state.py +119 -0
  87. recce/state/state_loader.py +174 -0
  88. recce/summary.py +188 -143
  89. recce/tasks/__init__.py +19 -3
  90. recce/tasks/core.py +11 -13
  91. recce/tasks/dataframe.py +82 -18
  92. recce/tasks/histogram.py +69 -34
  93. recce/tasks/lineage.py +2 -2
  94. recce/tasks/profile.py +152 -86
  95. recce/tasks/query.py +139 -87
  96. recce/tasks/rowcount.py +37 -31
  97. recce/tasks/schema.py +18 -15
  98. recce/tasks/top_k.py +35 -35
  99. recce/tasks/valuediff.py +216 -152
  100. recce/util/__init__.py +3 -0
  101. recce/util/api_token.py +80 -0
  102. recce/util/breaking.py +87 -85
  103. recce/util/cll.py +274 -219
  104. recce/util/io.py +22 -17
  105. recce/util/lineage.py +65 -16
  106. recce/util/logger.py +1 -1
  107. recce/util/onboarding_state.py +45 -0
  108. recce/util/perf_tracking.py +85 -0
  109. recce/util/recce_cloud.py +322 -72
  110. recce/util/singleton.py +4 -4
  111. recce/yaml/__init__.py +7 -10
  112. recce_cloud/__init__.py +24 -0
  113. recce_cloud/api/__init__.py +17 -0
  114. recce_cloud/api/base.py +111 -0
  115. recce_cloud/api/client.py +150 -0
  116. recce_cloud/api/exceptions.py +26 -0
  117. recce_cloud/api/factory.py +63 -0
  118. recce_cloud/api/github.py +76 -0
  119. recce_cloud/api/gitlab.py +82 -0
  120. recce_cloud/artifact.py +57 -0
  121. recce_cloud/ci_providers/__init__.py +9 -0
  122. recce_cloud/ci_providers/base.py +82 -0
  123. recce_cloud/ci_providers/detector.py +147 -0
  124. recce_cloud/ci_providers/github_actions.py +136 -0
  125. recce_cloud/ci_providers/gitlab_ci.py +130 -0
  126. recce_cloud/cli.py +245 -0
  127. recce_cloud/upload.py +214 -0
  128. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/METADATA +68 -37
  129. recce_nightly-1.26.0.20251124.dist-info/RECORD +180 -0
  130. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/WHEEL +1 -1
  131. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/top_level.txt +1 -0
  132. tests/adapter/dbt_adapter/conftest.py +9 -5
  133. tests/adapter/dbt_adapter/dbt_test_helper.py +37 -22
  134. tests/adapter/dbt_adapter/test_dbt_adapter.py +0 -15
  135. tests/adapter/dbt_adapter/test_dbt_cll.py +656 -41
  136. tests/adapter/dbt_adapter/test_selector.py +22 -21
  137. tests/recce_cloud/__init__.py +0 -0
  138. tests/recce_cloud/test_ci_providers.py +351 -0
  139. tests/recce_cloud/test_cli.py +372 -0
  140. tests/recce_cloud/test_client.py +273 -0
  141. tests/recce_cloud/test_platform_clients.py +333 -0
  142. tests/tasks/conftest.py +1 -1
  143. tests/tasks/test_histogram.py +58 -66
  144. tests/tasks/test_lineage.py +36 -23
  145. tests/tasks/test_preset_checks.py +45 -31
  146. tests/tasks/test_profile.py +339 -15
  147. tests/tasks/test_query.py +46 -46
  148. tests/tasks/test_row_count.py +65 -46
  149. tests/tasks/test_schema.py +65 -42
  150. tests/tasks/test_top_k.py +22 -18
  151. tests/tasks/test_valuediff.py +43 -32
  152. tests/test_cli.py +174 -60
  153. tests/test_cli_mcp_optional.py +45 -0
  154. tests/test_cloud_listing_cli.py +324 -0
  155. tests/test_config.py +7 -9
  156. tests/test_connect_to_cloud.py +82 -0
  157. tests/test_core.py +151 -4
  158. tests/test_dbt.py +7 -7
  159. tests/test_mcp_server.py +332 -0
  160. tests/test_pull_request.py +1 -1
  161. tests/test_server.py +25 -19
  162. tests/test_summary.py +29 -17
  163. recce/data/_next/static/Kcbs3GEIyH2LxgLYat0es/_buildManifest.js +0 -1
  164. recce/data/_next/static/chunks/1f229bf6-d9fe92e56db8d93b.js +0 -1
  165. recce/data/_next/static/chunks/29e3cc0d-8c150e37dff9631b.js +0 -1
  166. recce/data/_next/static/chunks/368-7587b306577df275.js +0 -65
  167. recce/data/_next/static/chunks/36e1c10d-bb0210cbd6573a8d.js +0 -1
  168. recce/data/_next/static/chunks/3998a672-eaad84bdd88cc73e.js +0 -1
  169. recce/data/_next/static/chunks/3a92ee20-3b5d922d4157af5e.js +0 -1
  170. recce/data/_next/static/chunks/450c323b-1bb5db526e54435a.js +0 -1
  171. recce/data/_next/static/chunks/47d8844f-79a1b53c66a7d7ec.js +0 -1
  172. recce/data/_next/static/chunks/6dc81886-c94b9b91bc2c3caf.js +0 -1
  173. recce/data/_next/static/chunks/6ef81909-694dc38134099299.js +0 -1
  174. recce/data/_next/static/chunks/700-3b65fc3666820d00.js +0 -2
  175. recce/data/_next/static/chunks/7a8a3e83-d7fa409d97b38b2b.js +0 -1
  176. recce/data/_next/static/chunks/7f27ae6c-413f6b869a04183a.js +0 -1
  177. recce/data/_next/static/chunks/8d700b6a-f0b1f6b9e0d97ce2.js +0 -1
  178. recce/data/_next/static/chunks/9746af58-d74bef4d03eea6ab.js +0 -1
  179. recce/data/_next/static/chunks/a30376cd-7d806e1602f2dc3a.js +0 -1
  180. recce/data/_next/static/chunks/app/_not-found/page-8a886fa0855c3105.js +0 -1
  181. recce/data/_next/static/chunks/app/layout-9102e22cb73f74d6.js +0 -1
  182. recce/data/_next/static/chunks/app/page-cee661090afbd6aa.js +0 -1
  183. recce/data/_next/static/chunks/b63b1b3f-7395c74e11a14e95.js +0 -1
  184. recce/data/_next/static/chunks/c132bf7d-8102037f9ccf372a.js +0 -1
  185. recce/data/_next/static/chunks/c1ceaa8b-a1e442154d23515e.js +0 -1
  186. recce/data/_next/static/chunks/cd9f8d63-cf0d5a7b0f7a92e8.js +0 -54
  187. recce/data/_next/static/chunks/ce84277d-f42c2c58049cea2d.js +0 -1
  188. recce/data/_next/static/chunks/e24bf851-0f8cbc99656833e7.js +0 -1
  189. recce/data/_next/static/chunks/fee69bc6-f17d36c080742e74.js +0 -1
  190. recce/data/_next/static/chunks/framework-ded83d71b51ce901.js +0 -1
  191. recce/data/_next/static/chunks/main-a0859f1f36d0aa6c.js +0 -1
  192. recce/data/_next/static/chunks/main-app-0225a2255968e566.js +0 -1
  193. recce/data/_next/static/chunks/pages/_app-d5672bf3d8b6371b.js +0 -1
  194. recce/data/_next/static/chunks/pages/_error-ed75be3f25588548.js +0 -1
  195. recce/data/_next/static/chunks/webpack-567d72f0bc0820d5.js +0 -1
  196. recce/data/_next/static/css/c9ecb46a4b21c126.css +0 -14
  197. recce/data/_next/static/media/montserrat-cyrillic-800-normal.22628180.woff2 +0 -0
  198. recce/data/_next/static/media/montserrat-cyrillic-800-normal.31d693bb.woff +0 -0
  199. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.7e2c1e62.woff +0 -0
  200. recce/data/_next/static/media/montserrat-cyrillic-ext-800-normal.94a63aea.woff2 +0 -0
  201. recce/data/_next/static/media/montserrat-latin-800-normal.6f8fa298.woff2 +0 -0
  202. recce/data/_next/static/media/montserrat-latin-800-normal.97e20d5e.woff +0 -0
  203. recce/data/_next/static/media/montserrat-latin-ext-800-normal.013b84f9.woff2 +0 -0
  204. recce/data/_next/static/media/montserrat-latin-ext-800-normal.aff52ab0.woff +0 -0
  205. recce/data/_next/static/media/montserrat-vietnamese-800-normal.5f21869b.woff +0 -0
  206. recce/data/_next/static/media/montserrat-vietnamese-800-normal.c0035377.woff2 +0 -0
  207. recce/state.py +0 -753
  208. recce_nightly-1.2.0.20250506.dist-info/RECORD +0 -142
  209. tests/test_state.py +0 -123
  210. /recce/data/_next/static/{Kcbs3GEIyH2LxgLYat0es → 52aV_JrNUZU6dMFgvTQEO}/_ssgManifest.js +0 -0
  211. /recce/data/_next/static/chunks/{polyfills-42372ed130431b0a.js → a6dad97d9634a72d.js} +0 -0
  212. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/entry_points.txt +0 -0
  213. {recce_nightly-1.2.0.20250506.dist-info → recce_nightly-1.26.0.20251124.dist-info}/licenses/LICENSE +0 -0
recce/tasks/core.py CHANGED
@@ -1,11 +1,11 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import List, Union, Optional, Literal
2
+ from typing import List, Literal, Optional, Union
3
3
 
4
4
  from pydantic import BaseModel
5
5
 
6
6
  from recce.core import default_context
7
7
  from recce.exceptions import RecceCancelException
8
- from recce.models import Run, Check
8
+ from recce.models import Check, Run
9
9
  from recce.util.pydantic_model import pydantic_model_dump
10
10
 
11
11
 
@@ -63,6 +63,7 @@ class TaskResultDiffer(ABC):
63
63
  @staticmethod
64
64
  def diff(base, current):
65
65
  from deepdiff import DeepDiff
66
+
66
67
  diff = DeepDiff(base, current, ignore_order=True)
67
68
  return diff if diff else None
68
69
 
@@ -76,15 +77,12 @@ class TaskResultDiffer(ABC):
76
77
  select: Optional[str] = None,
77
78
  exclude: Optional[str] = None,
78
79
  packages: Optional[list[str]] = None,
79
- view_mode: Optional[Literal['all', 'changed_models']] = None,
80
+ view_mode: Optional[Literal["all", "changed_models"]] = None,
80
81
  ) -> List[str]:
81
82
  nodes = default_context().adapter.select_nodes(
82
- select=select,
83
- exclude=exclude,
84
- packages=packages,
85
- view_mode=view_mode
83
+ select=select, exclude=exclude, packages=packages, view_mode=view_mode
86
84
  )
87
- return [node for node in nodes if not node.startswith('test.')]
85
+ return [node for node in nodes if not node.startswith("test.")]
88
86
 
89
87
  @abstractmethod
90
88
  def _check_result_changed_fn(self, result):
@@ -100,10 +98,10 @@ class TaskResultDiffer(ABC):
100
98
  Should be implemented by subclass.
101
99
  """
102
100
  params = self.run.params
103
- if params.get('model'):
104
- return [TaskResultDiffer.get_node_id_by_name(params.get('model'))]
105
- elif params.get('node_names'):
106
- names = params.get('node_names', [])
101
+ if params.get("model"):
102
+ return [TaskResultDiffer.get_node_id_by_name(params.get("model"))]
103
+ elif params.get("node_names"):
104
+ names = params.get("node_names", [])
107
105
  return [TaskResultDiffer.get_node_id_by_name(name) for name in names]
108
106
  else:
109
107
  # No related node ids in the params
@@ -125,7 +123,7 @@ class CheckValidator:
125
123
  try:
126
124
  check = Check(**check)
127
125
  except Exception as e:
128
- raise ValueError(f'Invalid check format. {str(e)}')
126
+ raise ValueError(f"Invalid check format. {str(e)}")
129
127
 
130
128
  self.validate_check(check)
131
129
 
recce/tasks/dataframe.py CHANGED
@@ -10,20 +10,43 @@ from pydantic import BaseModel, Field
10
10
 
11
11
 
12
12
  class DataFrameColumnType(Enum):
13
- NUMBER = 'number'
14
- INTEGER = 'integer'
15
- TEXT = 'text'
16
- BOOLEAN = 'boolean'
17
- DATE = 'date'
18
- DATETIME = 'datetime'
19
- TIMEDELTA = 'timedelta'
20
- UNKNOWN = 'unknown'
13
+ NUMBER = "number"
14
+ INTEGER = "integer"
15
+ TEXT = "text"
16
+ BOOLEAN = "boolean"
17
+ DATE = "date"
18
+ DATETIME = "datetime"
19
+ TIMEDELTA = "timedelta"
20
+ UNKNOWN = "unknown"
21
+
22
+ @classmethod
23
+ def from_string(cls, type_str: str) -> "DataFrameColumnType":
24
+ """Convert string to DataFrameColumnType enum.
25
+
26
+ Args:
27
+ type_str: String representation of the type (e.g., "integer", "text")
28
+
29
+ Returns:
30
+ DataFrameColumnType enum value
31
+ """
32
+ type_str = type_str.lower().strip()
33
+ try:
34
+ return cls(type_str)
35
+ except ValueError:
36
+ return cls.UNKNOWN
21
37
 
22
38
 
23
39
  class DataFrameColumn(BaseModel):
40
+ key: t.Optional[str] = None
24
41
  name: str
25
42
  type: DataFrameColumnType
26
43
 
44
+ def __init__(self, **data):
45
+ """Initialize DataFrameColumn, auto-setting key=name if key is missing."""
46
+ if "key" not in data or data["key"] is None:
47
+ data["key"] = data.get("name")
48
+ super().__init__(**data)
49
+
27
50
 
28
51
  class DataFrame(BaseModel):
29
52
  columns: t.List[DataFrameColumn]
@@ -32,19 +55,21 @@ class DataFrame(BaseModel):
32
55
  more: t.Optional[bool] = Field(None, description="Whether there are more rows to fetch")
33
56
 
34
57
  @staticmethod
35
- def from_agate(table: 'agate.Table', limit: t.Optional[int] = None, more: t.Optional[bool] = None):
58
+ def from_agate(table: "agate.Table", limit: t.Optional[int] = None, more: t.Optional[bool] = None):
36
59
  from recce.adapter.dbt_adapter import dbt_version
37
- if dbt_version < 'v1.8':
60
+
61
+ if dbt_version < "v1.8":
38
62
  import dbt.clients.agate_helper as agate_helper
39
63
  else:
40
64
  import dbt_common.clients.agate_helper as agate_helper
41
65
 
42
66
  import agate
67
+
43
68
  columns = []
44
69
 
45
70
  for col_name, col_type in zip(table.column_names, table.column_types):
46
71
 
47
- has_integer = hasattr(agate_helper, 'Integer')
72
+ has_integer = hasattr(agate_helper, "Integer")
48
73
 
49
74
  if isinstance(col_type, agate.Number):
50
75
  col_type = DataFrameColumnType.NUMBER
@@ -62,7 +87,7 @@ class DataFrame(BaseModel):
62
87
  col_type = DataFrameColumnType.INTEGER
63
88
  else:
64
89
  col_type = DataFrameColumnType.UNKNOWN
65
- columns.append(DataFrameColumn(name=col_name, type=col_type))
90
+ columns.append(DataFrameColumn(key=col_name, name=col_name, type=col_type))
66
91
 
67
92
  def _row_values(row):
68
93
  # If the value is Decimal, check if it's finite. If not, convert it to float(xxx) (GitHub issue #476)
@@ -78,23 +103,23 @@ class DataFrame(BaseModel):
78
103
  return df
79
104
 
80
105
  @staticmethod
81
- def from_pandas(pandas_df: 'pandas.DataFrame', limit: t.Optional[int] = None, more: t.Optional[bool] = None):
106
+ def from_pandas(pandas_df: "pandas.DataFrame", limit: t.Optional[int] = None, more: t.Optional[bool] = None):
82
107
  columns = []
83
108
  for column in pandas_df.columns:
84
109
  dtype = pandas_df[column].dtype
85
- if dtype == 'int64':
110
+ if dtype == "int64":
86
111
  col_type = DataFrameColumnType.INTEGER
87
- elif dtype == 'float64':
112
+ elif dtype == "float64":
88
113
  col_type = DataFrameColumnType.NUMBER
89
- elif dtype == 'object':
114
+ elif dtype == "object":
90
115
  col_type = DataFrameColumnType.TEXT
91
- elif dtype == 'bool':
116
+ elif dtype == "bool":
92
117
  col_type = DataFrameColumnType.BOOLEAN
93
118
  else:
94
119
  col_type = DataFrameColumnType.UNKNOWN
95
120
  columns.append(DataFrameColumn(name=column, type=col_type))
96
121
 
97
- s = pandas_df.to_json(orient='values')
122
+ s = pandas_df.to_json(orient="values")
98
123
  data = json.loads(s)
99
124
 
100
125
  df = DataFrame(
@@ -104,3 +129,42 @@ class DataFrame(BaseModel):
104
129
  more=more,
105
130
  )
106
131
  return df
132
+
133
+ @staticmethod
134
+ def from_data(
135
+ columns: t.Dict[str, str],
136
+ data: t.List[tuple],
137
+ limit: t.Optional[int] = None,
138
+ more: t.Optional[bool] = None,
139
+ ):
140
+ """Create a DataFrame from columns and data directly.
141
+
142
+ Args:
143
+ columns: Dict defining the schema where keys are column names and values are type strings.
144
+ Type strings can be: "number", "integer", "text", "boolean", "date", "datetime", "timedelta"
145
+ data: List of rows (each row is a list/tuple/sequence of values)
146
+ limit: Optional limit on the number of rows returned
147
+ more: Optional flag indicating whether there are more rows to fetch
148
+
149
+ Returns:
150
+ DataFrame instance
151
+
152
+ Examples:
153
+ # Using simple dict format
154
+ columns = {"idx": "integer", "name": "text", "impacted": "boolean"}
155
+ data = [[0, "model_a", True], [1, "model_b", False]]
156
+ df = DataFrame.from_data(columns, data)
157
+ """
158
+ # Convert dict columns to DataFrameColumn objects
159
+ processed_columns = []
160
+ for key, type_str in columns.items():
161
+ col_type = DataFrameColumnType.from_string(type_str)
162
+ processed_columns.append(DataFrameColumn(key=key, name=key, type=col_type))
163
+
164
+ df = DataFrame(
165
+ columns=processed_columns,
166
+ data=data,
167
+ limit=limit,
168
+ more=more,
169
+ )
170
+ return df
recce/tasks/histogram.py CHANGED
@@ -9,34 +9,66 @@ from pydantic import BaseModel
9
9
  from recce.core import default_context
10
10
  from recce.models import Check
11
11
  from recce.tasks import Task
12
- from recce.tasks.core import TaskResultDiffer, CheckValidator
12
+ from recce.tasks.core import CheckValidator, TaskResultDiffer
13
13
  from recce.tasks.query import QueryMixin
14
14
 
15
15
  sql_datetime_types = [
16
- "DATE", "DATETIME", "TIMESTAMP", "TIME",
16
+ "DATE",
17
+ "DATETIME",
18
+ "TIMESTAMP",
19
+ "TIME",
17
20
  "YEAR", # Specific to MySQL/MariaDB
18
- "DATETIME2", "SMALLDATETIME", "DATETIMEOFFSET", # Specific to SQL Server
21
+ "DATETIME2",
22
+ "SMALLDATETIME",
23
+ "DATETIMEOFFSET", # Specific to SQL Server
19
24
  "INTERVAL", # Common in PostgreSQL and Oracle
20
- "TIMESTAMPTZ", "TIMETZ", # Specific to PostgreSQL
21
- "TIMESTAMP WITH TIME ZONE", "TIMESTAMP WITH LOCAL TIME ZONE", # Oracle
22
- "TIMESTAMP_LTZ", "TIMESTAMP_NTZ", "TIMESTAMP_TZ", # Specific to Snowflake
25
+ "TIMESTAMPTZ",
26
+ "TIMETZ", # Specific to PostgreSQL
27
+ "TIMESTAMP WITH TIME ZONE",
28
+ "TIMESTAMP WITH LOCAL TIME ZONE", # Oracle
29
+ "TIMESTAMP_LTZ",
30
+ "TIMESTAMP_NTZ",
31
+ "TIMESTAMP_TZ", # Specific to Snowflake
23
32
  ]
24
33
 
25
34
  sql_integer_types = [
26
- "TINYINT", "SMALLINT", "MEDIUMINT", "INT", "INTEGER", "BIGINT", # Common across most databases
27
- "INT2", "INT4", "INT8", # PostgreSQL specific aliases
35
+ "TINYINT",
36
+ "SMALLINT",
37
+ "MEDIUMINT",
38
+ "INT",
39
+ "INTEGER",
40
+ "BIGINT", # Common across most databases
41
+ "INT2",
42
+ "INT4",
43
+ "INT8", # PostgreSQL specific aliases
28
44
  "UNSIGNED BIG INT", # SQLite specific
29
45
  "NUMBER", # Oracle, can be used as an integer with precision and scale
30
46
  "NUMERIC", # Generally available in many SQL databases, used with precision and scale
31
- "SMALLSERIAL", "SERIAL", "BIGSERIAL", # PostgreSQL auto-increment types
32
- "IDENTITY", "SMALLIDENTITY", "BIGIDENTITY", # SQL Server specific auto-increment types
47
+ "SMALLSERIAL",
48
+ "SERIAL",
49
+ "BIGSERIAL", # PostgreSQL auto-increment types
50
+ "IDENTITY",
51
+ "SMALLIDENTITY",
52
+ "BIGIDENTITY", # SQL Server specific auto-increment types
33
53
  "BYTEINT", # Specific to Snowflake, for storing very small integers
34
54
  ]
35
55
 
36
56
  sql_not_supported_types = [
37
- "CHAR", "VARCHAR", "TINYTEXT", "TEXT", "MEDIUMTEXT", "LONGTEXT",
38
- "NCHAR", "NVARCHAR", "VARCHAR2", "NVARCHAR2", "CLOB", "NCLOB",
39
- "VARCHAR(MAX)", "XML", "JSON",
57
+ "CHAR",
58
+ "VARCHAR",
59
+ "TINYTEXT",
60
+ "TEXT",
61
+ "MEDIUMTEXT",
62
+ "LONGTEXT",
63
+ "NCHAR",
64
+ "NVARCHAR",
65
+ "VARCHAR2",
66
+ "NVARCHAR2",
67
+ "CLOB",
68
+ "NCLOB",
69
+ "VARCHAR(MAX)",
70
+ "XML",
71
+ "JSON",
40
72
  "BOOLEAN", # PostgreSQL, SQLite, and others with native boolean support
41
73
  "TINYINT(1)", # MySQL/MariaDB uses TINYINT(1) to represent boolean values
42
74
  "BIT", # SQL Server and others use BIT to represent boolean values, where 1 is true and 0 is false
@@ -185,7 +217,7 @@ def query_numeric_histogram(task, node, column, column_type, min_value, max_valu
185
217
  else:
186
218
  counts[num_bins - 1] += count
187
219
  base_result = {
188
- 'counts': counts,
220
+ "counts": counts,
189
221
  }
190
222
  if curr is not None:
191
223
  counts = [0] * num_bins
@@ -199,7 +231,7 @@ def query_numeric_histogram(task, node, column, column_type, min_value, max_valu
199
231
  else:
200
232
  counts[num_bins - 1] += count
201
233
  curr_result = {
202
- 'counts': counts,
234
+ "counts": counts,
203
235
  }
204
236
  return base_result, curr_result, bin_edges, labels
205
237
 
@@ -209,7 +241,7 @@ def query_datetime_histogram(task, node, column, min_value, max_value):
209
241
  print(max_value, min_value, days_delta)
210
242
  # _type = None
211
243
  if days_delta > 365 * 4:
212
- _type = 'yearly'
244
+ _type = "yearly"
213
245
  dmin = date(min_value.year, 1, 1)
214
246
  if max_value.year < 3000:
215
247
  dmax = date(max_value.year, 1, 1) + relativedelta(years=+1)
@@ -237,7 +269,7 @@ def query_datetime_histogram(task, node, column, min_value, max_value):
237
269
  else:
238
270
  dmax = date(3000, 1, 1)
239
271
  period = relativedelta(dmax, dmin)
240
- num_buckets = (period.years * 12 + period.months)
272
+ num_buckets = period.years * 12 + period.months
241
273
  bin_edges = [dmin + relativedelta(months=i) for i in range(num_buckets + 1)]
242
274
  sql = f"""
243
275
  SELECT
@@ -285,18 +317,18 @@ def query_datetime_histogram(task, node, column, min_value, max_value):
285
317
 
286
318
  base_counts = [0] * num_buckets
287
319
  print(_type)
288
- for (d, v) in base.rows:
320
+ for d, v in base.rows:
289
321
  i = bin_edges.index(d.date()) if isinstance(d, datetime) else bin_edges.index(d)
290
322
  base_counts[i] = v
291
323
  curr_counts = [0] * num_buckets
292
- for (d, v) in curr.rows:
324
+ for d, v in curr.rows:
293
325
  i = bin_edges.index(d.date()) if isinstance(d, datetime) else bin_edges.index(d)
294
326
  curr_counts[i] = v
295
327
  base_result = {
296
- 'counts': base_counts,
328
+ "counts": base_counts,
297
329
  }
298
330
  curr_result = {
299
- 'counts': curr_counts,
331
+ "counts": curr_counts,
300
332
  }
301
333
 
302
334
  return base_result, curr_result, bin_edges
@@ -310,6 +342,7 @@ class HistogramDiffTask(Task, QueryMixin):
310
342
 
311
343
  def execute(self):
312
344
  from recce.adapter.dbt_adapter import DbtAdapter
345
+
313
346
  result = {}
314
347
 
315
348
  dbt_adapter: DbtAdapter = default_context().adapter
@@ -353,29 +386,31 @@ class HistogramDiffTask(Task, QueryMixin):
353
386
  labels = None
354
387
  if min_value is None or max_value is None:
355
388
  base_result = {
356
- 'counts': [],
389
+ "counts": [],
357
390
  }
358
391
  current_result = {
359
- 'counts': [],
392
+ "counts": [],
360
393
  }
361
394
  bin_edges = []
362
395
  labels = []
363
396
  elif column_type.upper() in sql_datetime_types:
364
397
  base_result, current_result, bin_edges = query_datetime_histogram(
365
- self, node, column, min_value, max_value)
398
+ self, node, column, min_value, max_value
399
+ )
366
400
  else:
367
401
  base_result, current_result, bin_edges, labels = query_numeric_histogram(
368
- self, node, column, column_type, min_value, max_value, num_bins)
402
+ self, node, column, column_type, min_value, max_value, num_bins
403
+ )
369
404
  if base_result:
370
- base_result['total'] = base_total
405
+ base_result["total"] = base_total
371
406
  if current_result:
372
- current_result['total'] = curr_total
373
- result['base'] = base_result
374
- result['current'] = current_result
375
- result['min'] = min_value
376
- result['max'] = max_value
377
- result['bin_edges'] = bin_edges
378
- result['labels'] = labels
407
+ current_result["total"] = curr_total
408
+ result["base"] = base_result
409
+ result["current"] = current_result
410
+ result["min"] = min_value
411
+ result["max"] = max_value
412
+ result["bin_edges"] = bin_edges
413
+ result["labels"] = labels
379
414
  return result
380
415
 
381
416
  def cancel(self):
@@ -386,7 +421,7 @@ class HistogramDiffTask(Task, QueryMixin):
386
421
 
387
422
  class HistogramDiffTaskResultDiffer(TaskResultDiffer):
388
423
  def _check_result_changed_fn(self, result):
389
- return TaskResultDiffer.diff(result['base'], result['current'])
424
+ return TaskResultDiffer.diff(result["base"], result["current"])
390
425
 
391
426
 
392
427
  class HistogramDiffCheckValidator(CheckValidator):
recce/tasks/lineage.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import Optional, Literal
1
+ from typing import Literal, Optional
2
2
 
3
3
  from pydantic import BaseModel
4
4
 
@@ -10,7 +10,7 @@ class LineageDiffParams(BaseModel):
10
10
  select: Optional[str] = None
11
11
  exclude: Optional[str] = None
12
12
  packages: Optional[list[str]] = None
13
- view_mode: Optional[Literal['all', 'changed_models']] = None
13
+ view_mode: Optional[Literal["all", "changed_models"]] = None
14
14
 
15
15
 
16
16
  class LineageDiffCheckValidator(CheckValidator):