databricks-tpcds 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. databricks_tpcds/__init__.py +9 -0
  2. databricks_tpcds/databricks_tpcds.py +153 -0
  3. databricks_tpcds-0.1.0.dist-info/METADATA +101 -0
  4. databricks_tpcds-0.1.0.dist-info/RECORD +116 -0
  5. databricks_tpcds-0.1.0.dist-info/WHEEL +5 -0
  6. databricks_tpcds-0.1.0.dist-info/top_level.txt +2 -0
  7. resources/__init__.py +0 -0
  8. resources/queries/q0.sql +2 -0
  9. resources/queries/q1.sql +24 -0
  10. resources/queries/q10.sql +58 -0
  11. resources/queries/q11.sql +80 -0
  12. resources/queries/q12.sql +33 -0
  13. resources/queries/q13.sql +51 -0
  14. resources/queries/q14.sql +209 -0
  15. resources/queries/q14a.sql +102 -0
  16. resources/queries/q14b.sql +106 -0
  17. resources/queries/q15.sql +19 -0
  18. resources/queries/q16.sql +30 -0
  19. resources/queries/q17.sql +44 -0
  20. resources/queries/q18.sql +33 -0
  21. resources/queries/q19.sql +24 -0
  22. resources/queries/q2.sql +59 -0
  23. resources/queries/q20.sql +29 -0
  24. resources/queries/q21.sql +29 -0
  25. resources/queries/q22.sql +19 -0
  26. resources/queries/q23.sql +106 -0
  27. resources/queries/q23a.sql +49 -0
  28. resources/queries/q23b.sql +56 -0
  29. resources/queries/q24.sql +105 -0
  30. resources/queries/q24a.sql +51 -0
  31. resources/queries/q24b.sql +53 -0
  32. resources/queries/q25.sql +47 -0
  33. resources/queries/q26.sql +20 -0
  34. resources/queries/q27.sql +22 -0
  35. resources/queries/q28.sql +52 -0
  36. resources/queries/q29.sql +46 -0
  37. resources/queries/q3.sql +20 -0
  38. resources/queries/q30.sql +30 -0
  39. resources/queries/q31.sql +51 -0
  40. resources/queries/q32.sql +27 -0
  41. resources/queries/q33.sql +74 -0
  42. resources/queries/q34.sql +30 -0
  43. resources/queries/q35.sql +57 -0
  44. resources/queries/q36.sql +29 -0
  45. resources/queries/q37.sql +16 -0
  46. resources/queries/q38.sql +22 -0
  47. resources/queries/q39.sql +53 -0
  48. resources/queries/q39a.sql +25 -0
  49. resources/queries/q39b.sql +27 -0
  50. resources/queries/q4.sql +115 -0
  51. resources/queries/q40.sql +27 -0
  52. resources/queries/q41.sql +51 -0
  53. resources/queries/q42.sql +21 -0
  54. resources/queries/q43.sql +18 -0
  55. resources/queries/q44.sql +34 -0
  56. resources/queries/q45.sql +19 -0
  57. resources/queries/q46.sql +34 -0
  58. resources/queries/q47.sql +50 -0
  59. resources/queries/q48.sql +66 -0
  60. resources/queries/q49.sql +128 -0
  61. resources/queries/q5.sql +127 -0
  62. resources/queries/q50.sql +58 -0
  63. resources/queries/q51.sql +44 -0
  64. resources/queries/q52.sql +21 -0
  65. resources/queries/q53.sql +27 -0
  66. resources/queries/q54.sql +55 -0
  67. resources/queries/q55.sql +13 -0
  68. resources/queries/q56.sql +68 -0
  69. resources/queries/q57.sql +47 -0
  70. resources/queries/q58.sql +64 -0
  71. resources/queries/q59.sql +43 -0
  72. resources/queries/q6.sql +25 -0
  73. resources/queries/q60.sql +77 -0
  74. resources/queries/q61.sql +43 -0
  75. resources/queries/q62.sql +34 -0
  76. resources/queries/q63.sql +28 -0
  77. resources/queries/q64.sql +119 -0
  78. resources/queries/q65.sql +28 -0
  79. resources/queries/q66.sql +219 -0
  80. resources/queries/q67.sql +43 -0
  81. resources/queries/q68.sql +41 -0
  82. resources/queries/q69.sql +46 -0
  83. resources/queries/q7.sql +20 -0
  84. resources/queries/q70.sql +37 -0
  85. resources/queries/q71.sql +39 -0
  86. resources/queries/q72.sql +28 -0
  87. resources/queries/q73.sql +27 -0
  88. resources/queries/q74.sql +60 -0
  89. resources/queries/q75.sql +69 -0
  90. resources/queries/q76.sql +23 -0
  91. resources/queries/q77.sql +107 -0
  92. resources/queries/q78.sql +57 -0
  93. resources/queries/q79.sql +22 -0
  94. resources/queries/q8.sql +107 -0
  95. resources/queries/q80.sql +95 -0
  96. resources/queries/q81.sql +30 -0
  97. resources/queries/q82.sql +16 -0
  98. resources/queries/q83.sql +66 -0
  99. resources/queries/q84.sql +20 -0
  100. resources/queries/q85.sql +83 -0
  101. resources/queries/q86.sql +25 -0
  102. resources/queries/q87.sql +22 -0
  103. resources/queries/q88.sql +93 -0
  104. resources/queries/q89.sql +27 -0
  105. resources/queries/q9.sql +50 -0
  106. resources/queries/q90.sql +21 -0
  107. resources/queries/q91.sql +30 -0
  108. resources/queries/q92.sql +29 -0
  109. resources/queries/q93.sql +17 -0
  110. resources/queries/q94.sql +28 -0
  111. resources/queries/q95.sql +31 -0
  112. resources/queries/q96.sql +15 -0
  113. resources/queries/q97.sql +24 -0
  114. resources/queries/q98.sql +32 -0
  115. resources/queries/q99.sql +34 -0
  116. resources/table_names.py +26 -0
@@ -0,0 +1,9 @@
1
+ import os
2
+ import sys
3
+
4
+ # Ensure that the package can find resources
5
+ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
6
+
7
+ # Expose modules so they can be imported directly
8
+ from resources import table_names
9
+ from .databricks_tpcds import DatabricksTPCDS
@@ -0,0 +1,153 @@
1
+ import time
2
+ from resources import table_names
3
+ import importlib.resources as pkg_resources
4
+
5
+
6
+ class DatabricksTPCDS:
7
+ """
8
+ A class to interact with Databricks for TPC-DS benchmarking.
9
+
10
+ Attributes
11
+ ----------
12
+ spark : pyspark.sql.session.SparkSession
13
+ The SparkSession used to interact with Databricks.
14
+ catalog_name : str
15
+ The name of the catalog in Databricks.
16
+ time_taken_by_query : dict
17
+ A dictionary to store the time taken by each query.
18
+
19
+ Methods
20
+ -------
21
+ use_catalog() -> None:
22
+ Uses the catalog in Databricks.
23
+ create_catalog() -> None:
24
+ Creates a new catalog in Databricks.
25
+ create_table(bucket_name: str, prefix: str, table_name: str) -> None:
26
+ Creates a new table in the Databricks catalog.
27
+ create_tables(bucket_name: str, prefix: str, table_names: list[str]) -> None:
28
+ Creates multiple tables in the Databricks catalog.
29
+ create_all_tables(bucket_name: str, prefix: str) -> None:
30
+ Creates all tables in the Databricks catalog.
31
+ run_query(query_num: int) -> float:
32
+ Runs a TPC-DS query and returns the total execution time.
33
+ run_queries(query_nums: list[int]) -> dict[int, float]:
34
+ Runs multiple TPC-DS queries and returns a dictionary of total execution times.
35
+ run_all_queries() -> dict[int, float]:
36
+ Runs all TPC-DS queries and returns a dictionary of total execution times.
37
+ """
38
+
39
+ order_by_cols: dict[str, str] = {
40
+ "call_center": "cc_call_center_id",
41
+ "catalog_page": "cp_catalog_page_id",
42
+ "catalog_returns": "cr_returned_date_sk",
43
+ "catalog_sales": "cs_sold_date_sk",
44
+ "customer": "c_customer_id",
45
+ "customer_address": "ca_address_id",
46
+ "customer_demographics": "cd_demo_sk",
47
+ "date_dim": "d_date_id",
48
+ "household_demographics": "hd_demo_sk",
49
+ "income_band": "ib_income_band_sk",
50
+ "inventory": "inv_item_sk",
51
+ "item": "i_item_id",
52
+ "promotion": "p_promo_id",
53
+ "reason": "r_reason_id",
54
+ "ship_mode": "sm_ship_mode_id",
55
+ "store": "s_store_id",
56
+ "store_returns": "sr_returned_date_sk",
57
+ "store_sales": "ss_sold_date_sk",
58
+ "time_dim": "t_time_id",
59
+ "warehouse": "w_warehouse_id",
60
+ "web_page": "wp_web_page_id",
61
+ "web_returns": "wr_returned_date_sk",
62
+ "web_sales": "ws_sold_date_sk",
63
+ "web_site": "web_site_id"
64
+ }
65
+
66
+ def __init__(self, spark, schema_name, catalog_name=None):
67
+ self.spark = spark
68
+ self.catalog_name = catalog_name
69
+ self.schema_name = schema_name
70
+ # self.enable_cache = enable_cache
71
+ self.time_taken_by_query = {}
72
+ print(f"Disk cache enabled: {self.spark.conf.get('spark.databricks.io.cache.enabled')}")
73
+
74
+ # def enable_disk_cache(self) -> None:
75
+ # if self.enable_cache == True:
76
+ # self.spark.conf.set("spark.databricks.io.cache.enabled", "true")
77
+ # elif self.enable_cache == False:
78
+ # self.spark.conf.set("spark.databricks.io.cache.enabled", "false")
79
+
80
+ def use_catalog(self) -> None:
81
+ if self.catalog_name:
82
+ self.spark.sql(f"USE CATALOG {self.catalog_name}")
83
+
84
+ def use_schema(self) -> None:
85
+ self.spark.sql(f"USE SCHEMA {self.schema_name}")
86
+
87
+ def create_catalog(self) -> None:
88
+ if self.catalog_name:
89
+ self.spark.sql(f"CREATE CATALOG IF NOT EXISTS {self.catalog_name}")
90
+
91
+ def create_schema(self) -> None:
92
+ self.use_catalog()
93
+ self.spark.sql(f"CREATE SCHEMA IF NOT EXISTS {self.schema_name}")
94
+
95
+ def create_table(self, bucket_name: str, prefix: str, table_name: str, table_format: str = "delta") -> None:
96
+ table_path = f"s3://{bucket_name}/{prefix}/{table_name}"
97
+ self.use_catalog()
98
+ self.use_schema()
99
+ self.spark.sql(f"CREATE TABLE IF NOT EXISTS {table_name} USING {table_format} LOCATION '{table_path}'")
100
+ print(f"Table {table_name} created successfully using {table_format} format.")
101
+
102
+ def create_tables(self, bucket_name: str, prefix: str, table_names: list[str], table_format: str = "delta") -> None:
103
+ for table in table_names:
104
+ self.create_table(bucket_name, prefix, table, table_format)
105
+
106
+ def create_all_tables(self, bucket_name: str, prefix: str, table_format: str = "delta") -> None:
107
+ self.create_tables(bucket_name, prefix, table_names.TABLE_NAMES, table_format)
108
+
109
+ def warm_up(self) -> None:
110
+ self.use_catalog()
111
+ self.use_schema()
112
+ for table in table_names.TABLE_NAMES:
113
+ order_by_col = self.order_by_cols[table]
114
+ self.spark.sql(f"SELECT * FROM {table} ORDER BY {order_by_col} LIMIT 100").collect()
115
+
116
+ def run_query(self, query_num: str) -> float:
117
+ self.use_catalog()
118
+ self.use_schema()
119
+ query_filename = f"q{query_num}.sql"
120
+
121
+ try:
122
+ with pkg_resources.open_text("resources.queries", query_filename) as query:
123
+ query_desc = f"q{query_num}"
124
+ print(query_desc)
125
+ query_string = query.read()
126
+ start_time = time.time()
127
+ self.spark.sparkContext.setJobGroup(query_desc, query_desc, interruptOnCancel=True)
128
+ self.spark.sql(query_string).collect()
129
+ end_time = time.time()
130
+ time_taken = (end_time - start_time) * 1000
131
+ self.time_taken_by_query[query_num] = time_taken
132
+ return round(time_taken)
133
+ except FileNotFoundError:
134
+ print(f"Query file {query_filename} not found in package.")
135
+ return -1
136
+
137
+ def run_queries(self, query_nums: list[int]) -> dict[int, float]:
138
+ time_taken_by_queries = {}
139
+ for query_num in query_nums:
140
+ time_taken_by_queries[query_num] = self.run_query(str(query_num))
141
+ return time_taken_by_queries
142
+
143
+ def run_all_queries(self, should_warmup=False) -> dict[int, float]:
144
+ if should_warmup:
145
+ self.warm_up()
146
+ time_taken_by_queries = {}
147
+ for query_num in range(1, 100):
148
+ if query_num in [14, 23, 24, 39]:
149
+ for subqueries in ["a", "b"]:
150
+ time_taken_by_queries[f"{query_num}{subqueries}"] = self.run_query(f"{query_num}{subqueries}")
151
+ else:
152
+ time_taken_by_queries[query_num] = self.run_query(str(query_num))
153
+ return time_taken_by_queries
@@ -0,0 +1,101 @@
1
+ Metadata-Version: 2.4
2
+ Name: databricks-tpcds
3
+ Version: 0.1.0
4
+ Summary: Run the TPC-DS benchmark on Databricks (Delta Lake).
5
+ Home-page: https://github.com/onehouseinc/onebench
6
+ Author: Onehouse
7
+ License: Apache-2.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: Apache Software License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.9
12
+ Description-Content-Type: text/markdown
13
+ Dynamic: author
14
+ Dynamic: classifier
15
+ Dynamic: description
16
+ Dynamic: description-content-type
17
+ Dynamic: home-page
18
+ Dynamic: license
19
+ Dynamic: requires-python
20
+ Dynamic: summary
21
+
22
+ ## Running TPCDS on Databricks
23
+ This document describes how to run TPCDS on Databricks. The TPCDS benchmark is a decision support benchmark that models several generally applicable aspects of a decision support system, including queries and data maintenance. The benchmark provides a representative evaluation of performance as a general purpose decision support system. The benchmark is the result of a partnership between the Transaction Processing Performance Council (TPC) and the decision support group (DS) of the Association for Computing Machinery (ACM).
24
+
25
+ ### Pre-requisites
26
+ 1. Databricks workspace
27
+ 2. Databricks metastore configured to workspace
28
+ 3. Databricks cluster (jobs/all purpose etc)
29
+
30
+ ## Install from PyPI
31
+ Install the package directly in a Databricks notebook:
32
+ ```shell
33
+ %pip install databricks-tpcds
34
+ ```
35
+
36
+ The package provides the `DatabricksTPCDS` library. You drive it from an entrypoint script like
37
+ the Delta Lake example below.
38
+
39
+ ## Delta Lake entrypoint example
40
+ Fill in the placeholder `catalog_name`, `bucket_name`, `prefix`, and `schema_name` with your own
41
+ values, then run it on your Databricks cluster.
42
+
43
+ ```python
44
+ from pyspark.sql import SparkSession
45
+ from databricks_tpcds.databricks_tpcds import DatabricksTPCDS
46
+
47
+
48
+ def main():
49
+ catalog_name = 'my_catalog'
50
+ bucket_name = 'my-bucket'
51
+ prefix = 'path/to/tpcds-datasets/1TB'
52
+ schema_name = 'my_schema'
53
+
54
+ # Initialize Spark session
55
+ spark = SparkSession.builder.appName("TPCDS Query Runner").getOrCreate()
56
+
57
+ # Enable/disable cache
58
+ spark.conf.set("spark.databricks.io.cache.enabled", "false")
59
+
60
+ databricks_tpcds = DatabricksTPCDS(spark, schema_name=schema_name, catalog_name=catalog_name)
61
+
62
+ # Create catalog
63
+ databricks_tpcds.create_catalog()
64
+
65
+ # Create schema
66
+ databricks_tpcds.create_schema()
67
+
68
+ # Create a single table, provide the table name
69
+ # databricks_tpcds.create_table(bucket_name, prefix, "call_center")
70
+
71
+ # Create multiple tables, provide the list of table names
72
+ # databricks_tpcds.create_tables(bucket_name, prefix, ["call_center", "catalog_page"])
73
+
74
+ # Create all tables, provide the bucket name and prefix, it'll create all the tables
75
+ databricks_tpcds.create_all_tables(bucket_name, prefix)
76
+
77
+ # Run all queries
78
+ for i in range(3):
79
+ time_taken_by_queries = databricks_tpcds.run_all_queries(should_warmup=False)
80
+ print("QUERY_NUMBER,TIME_TAKEN")
81
+ for query_no, time_taken in time_taken_by_queries.items():
82
+ print(f"{query_no},{time_taken}")
83
+
84
+
85
+ if __name__ == "__main__":
86
+ main()
87
+ ```
88
+
89
+ ## Developing locally
90
+ 1. Modify the code if necessary in `src/databricks_tpcds/databricks_tpcds.py`
91
+ 2. Take a look or modify the queries in `src/resources/queries/`
92
+ 3. Build the package:
93
+ ```shell
94
+ cd tpcds/databricks
95
+ python3.10 -m build
96
+ ```
97
+ 4. Upload the built `.whl` to your Databricks workspace and install it in a notebook:
98
+ ```shell
99
+ %pip install path/to/databricks_tpcds-0.1.0-py3-none-any.whl --force-reinstall
100
+ ```
101
+ 5. Run the benchmark using the Delta Lake entrypoint example above.
@@ -0,0 +1,116 @@
1
+ databricks_tpcds/__init__.py,sha256=D3UhqQMa2Zo1O18AJBfv0kZL_7Updi9zINGH7-ODTy0,258
2
+ databricks_tpcds/databricks_tpcds.py,sha256=L1C8UmYrMTYkXKk70Dw-di-K_kgp_S5aEmFnUfeeAP8,6415
3
+ resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ resources/table_names.py,sha256=x8g3Hu7CqrWK1mbcNEUCwpFHLcLlEjpwgK9380R7cjs,466
5
+ resources/queries/q0.sql,sha256=1kSAiQgPOTSwisGkxqNmF4LLwbXIi6G7g-zLytgny38,47
6
+ resources/queries/q1.sql,sha256=PTYMAqFW1-Q7eE8QWq5XqMk9EPseKXYJK5KPxesDprY,626
7
+ resources/queries/q10.sql,sha256=OZ9ouFNSaTAX_XTxkb6yZkGpZRJKvvqhHllgI-UYT0k,1754
8
+ resources/queries/q11.sql,sha256=JwR4vAl2cJDUgT81GQr67uOzGsc8M5CFiBm_plYSAdg,2830
9
+ resources/queries/q12.sql,sha256=StYz8D6HE89N29N6jPLz0YGXIRknQSxlQaEWTY9_uFA,750
10
+ resources/queries/q13.sql,sha256=CBDKyBa14NzTywVqw55sA4WwN37qoqBxyB8M1_5coOc,1457
11
+ resources/queries/q14.sql,sha256=66j0TUGmDnPbsUHy5wX5SX-dBqgGjwpYKxKdckPdvCc,7382
12
+ resources/queries/q14a.sql,sha256=6fi3XM8GQVRQUbC8zUUg6OrTsOfxc5N-JwA_bqU3C1A,3511
13
+ resources/queries/q14b.sql,sha256=MZRv8vkkM6Tdx6x1LKL1VnJWXg0eqfgPu-0ljSDmH6k,3869
14
+ resources/queries/q15.sql,sha256=ZnkABvPLLS_fNfvTv8K1e4LU6JNXPk2-osLZTyDtLHU,545
15
+ resources/queries/q16.sql,sha256=ydKhoakonPpRR-8asN_gJVpmYbWOrk42D-b927fY2Ts,973
16
+ resources/queries/q17.sql,sha256=pHu6PBqMPb06PyRI_hdR9-NNq0Dq5WAzrtYDw5Covv0,1637
17
+ resources/queries/q18.sql,sha256=kBCntZr4tf0bKvEGqdmHHC0vctzz8v19B3VcvlsnRYA,1240
18
+ resources/queries/q19.sql,sha256=c9Sz_2l-d9ZABPLX-PAjJruWa94N5W1SB5wyOSBl8Ag,666
19
+ resources/queries/q2.sql,sha256=Es3tZnyYmGwPA1HCq6KV1GAsu4lOumamhpwDtpXF6Lc,2117
20
+ resources/queries/q20.sql,sha256=9Bm2Z1ElEznDqiU3_UAK9E7BuvteKVzk0csa9UqoyoQ,768
21
+ resources/queries/q21.sql,sha256=TgXZrPTdt9H3K0Y25-VUzbjGj5CreEIhFkdWs5g9HWo,1040
22
+ resources/queries/q22.sql,sha256=Y2OA_KcO1yfEgZt2i2WHjyuKRZTY4tVG4Gr30TwldJ8,550
23
+ resources/queries/q23.sql,sha256=PxR4sSEBgiNZAoQDjMwjFG9XUzxYYZSNEk8WjyRS1og,3789
24
+ resources/queries/q23a.sql,sha256=CEsE_NpeFh4Xrs_mJrWKBrAIAdNMFh3kYmQl19Bzb8E,1694
25
+ resources/queries/q23b.sql,sha256=-tBHJ4MjF2xjbXox2IRBgCe5joErmlDnU7eM1yy9joA,2054
26
+ resources/queries/q24.sql,sha256=Bghu-03GNLBnhsFdfdk0Uf9e_kD4lzfJMe24cO82DXg,2324
27
+ resources/queries/q24a.sql,sha256=KWV2Sg2Bf7gbe_5-_ZJTtVwRQBXd688z-qZaD1JzTtM,1155
28
+ resources/queries/q24b.sql,sha256=-s04D7KhD-wfZEMQATKlJOctLKL2mCqfBiXHg1WcUZk,1155
29
+ resources/queries/q25.sql,sha256=omZEK7nySq7kRMeUCYxCnLVbgl2eq3qYg53yg1Ex2MQ,1005
30
+ resources/queries/q26.sql,sha256=3o6gqMkMAHgcmekM3VcHTpWR0TNbrko01Rv5kyIGppk,629
31
+ resources/queries/q27.sql,sha256=F08oFBAuyY5vnuC7btOV5kHVXpYovmtDWxpjriJL_qs,690
32
+ resources/queries/q28.sql,sha256=vAQCnMnHg95sZOYU_jAWo-G6KVlnXvfSbHw2ptcRmsE,2178
33
+ resources/queries/q29.sql,sha256=T0NIgYAsHVPbySyjnxUjcAgP_ZQaMwyb6Li3VpzdrMM,1251
34
+ resources/queries/q3.sql,sha256=oLf7g392MnqkhO_JScL9J2l41eg3vYV74fmS1tlc0J4,464
35
+ resources/queries/q30.sql,sha256=Buvtr4UUz_GAcYkYKLB5lk7UqF2B4c8hUJQKE0AzHnk,1205
36
+ resources/queries/q31.sql,sha256=NMYKUubYRSZT52pGtaDV8UUNaX_Jx8ijk9h-71kVM7g,1695
37
+ resources/queries/q32.sql,sha256=pbi-B4Rkfp53Bp4z3LgDQFvqdujjaAQHvFba_R-r3Jo,688
38
+ resources/queries/q33.sql,sha256=1BaP8vu92vJb9TaemzK-UNxroo3zssaQHn3Cp65Dl20,1867
39
+ resources/queries/q34.sql,sha256=BAfGjRZEGCl3UpxVYT86dqEzCKHTN5wTPWiPZ2x4lzI,1385
40
+ resources/queries/q35.sql,sha256=aGsrUgIOoU6PtN_OS2ATZPNTZ_tXtaZEMNo-QYu4oAk,1737
41
+ resources/queries/q36.sql,sha256=w3jE1d_DD_c84WC6rTDSRcJy-BZuiDyawBn5xF7ZqRs,799
42
+ resources/queries/q37.sql,sha256=FAoOHxwZhjGlumpQF5x4oX9y8g99RPJ4zRckosY0eo8,510
43
+ resources/queries/q38.sql,sha256=dPDU6jQJ_I6fpiqC0ns_bfDTunTcmB1fDvBD5Vti1Wo,907
44
+ resources/queries/q39.sql,sha256=0PdwZt60nE1wl7ZXHsRM9m80HYHg4G9PouKPscGJaEg,2463
45
+ resources/queries/q39a.sql,sha256=HtqWU4iYoPYHVKQpOiAahAd26prdELGJrXGzu6LtToE,1208
46
+ resources/queries/q39b.sql,sha256=6M_l1lUbAc6XNhVsWEneT6y4KvrlHYrN5U1a_Y-erBs,1242
47
+ resources/queries/q4.sql,sha256=rWgvmSYKNqPHKljU-JBGvRMvO9mrAmq8UupA4RSbaik,4219
48
+ resources/queries/q40.sql,sha256=ALOaGtcjoJJYscwU9kGEmKizlQ802IYQQIR1pT10jfU,885
49
+ resources/queries/q41.sql,sha256=1quQOx8ZshgEfahNDIdZTLPS0vNO6XH9lZU6hPNwZoU,1907
50
+ resources/queries/q42.sql,sha256=p1wEaPLqJO6iKEjKqFwHyBUNv1HMX3Y0nblUzCeyODE,482
51
+ resources/queries/q43.sql,sha256=UJlnIZ8MykPJib4jIzIqxkDhnMLcotj2X29w_8bugxg,999
52
+ resources/queries/q44.sql,sha256=rbMjwL-6R3cdVTPbh-yczlyrXEdKPtnYR97NG2aT73Q,1623
53
+ resources/queries/q45.sql,sha256=tFAqAmqAzxK-UDZWboI5vOTkw7t-F-a7ZkzehBw_4z8,688
54
+ resources/queries/q46.sql,sha256=Mn9NhmHQj398ITp07iJ91U3CAq41Rbf4ywe0BeJVrMM,1273
55
+ resources/queries/q47.sql,sha256=H1vUjdw8BbXPC-dTssa_awWsWnUmsqdVjKovLBg1fY8,1787
56
+ resources/queries/q48.sql,sha256=TaB5mkKlGAUXzdFvxsV-K0A03UZ-qlzzgEiO3iqVZ7U,1212
57
+ resources/queries/q49.sql,sha256=TXNUlYPUhFW_dUvkXnyZLyTFRhTRjrxYglZaPpfMsfA,3749
58
+ resources/queries/q5.sql,sha256=PS2FUJQ48Rh0BX2xJ9m_imgv2ghhHtMp4CjMM28UKOg,3914
59
+ resources/queries/q50.sql,sha256=DfaGuneMmgzekbjEJtdeoTde-vy3urwlNV21ad6OJRU,1571
60
+ resources/queries/q51.sql,sha256=7DBWp5FmM79h_dcXNzl4eVQ3fqjNfXJeCyZRWk23ic0,1650
61
+ resources/queries/q52.sql,sha256=6Jl4XhoruiW1CDTiWH5RsWC-XNJH4boDAlgjDX9FyVY,454
62
+ resources/queries/q53.sql,sha256=avH82lc22lIqxzAVPI-JK3-7iaHs9QH76oNLNGCDGGY,1068
63
+ resources/queries/q54.sql,sha256=Rz9AYaKvZE-g3o14ncgFPqHcdwJ-4DJcBUKvDmCJmYw,1699
64
+ resources/queries/q55.sql,sha256=ScPPDeyBlddwFoIEu18Rm1BLu0zKZHhWcuW4gtucrUk,326
65
+ resources/queries/q56.sql,sha256=aJ0Db5J7FM0J-U-jOjNRf-duZPIJh9KC0gEBiN8SdnY,1851
66
+ resources/queries/q57.sql,sha256=0vtHOvQlk88ecte68krJ4-R1fzy9F7ZxJXoCXRcPF6I,1546
67
+ resources/queries/q58.sql,sha256=NehEN-eEUNXITW-Ka6THtWFoAcSDRkpGX36MdjG7GQ4,2333
68
+ resources/queries/q59.sql,sha256=SRIYDS0ks7O1rRV8kt51PZ57J6EvJfrzuToyZPbdL-4,2020
69
+ resources/queries/q6.sql,sha256=b97p0TEaC0fvbElwjS1mi1xiZ-OODLfH5c4G5jfKqsI,674
70
+ resources/queries/q60.sql,sha256=9l_Vdw2sjEweOj51k_YkWyyuvREraZuvvubgc7la95g,1836
71
+ resources/queries/q61.sql,sha256=KzLPq4vTFaXsGxRCa3zRzU-O14oiOK5urZ3nGpe-NbM,1254
72
+ resources/queries/q62.sql,sha256=ew5Re2cTHhjsT7qekV77dBFBqOaXy8wnESelFrS7Qcw,1163
73
+ resources/queries/q63.sql,sha256=Ji_bZ43Ol3g3JFI0v6i0cGT3ARE_qu7fYFfrw7tGYug,1272
74
+ resources/queries/q64.sql,sha256=ayb_luuiJLLWP2O_o0O0vQaOdKILqm6fAoSA6qK40gU,3548
75
+ resources/queries/q65.sql,sha256=B7FBYsTU9SG_ow0mtFsT0bhvq70JxcVINMExOe4lWy0,854
76
+ resources/queries/q66.sql,sha256=o8z9wkhIL3NVJPNrmdoT9x1vZPn60Fp3TQjdSP5dL5o,7376
77
+ resources/queries/q67.sql,sha256=9mxTsb4Ggqj8P3CY8vp8KjOAhqk6vzRv-r7w7DFEy3Q,1173
78
+ resources/queries/q68.sql,sha256=Rc8WuuOE8_wR-ZDgGfQUCkvlP9EJAfAaXht5EJO2moo,1427
79
+ resources/queries/q69.sql,sha256=fv3YiVDnNpUgZVvqThH3SXuGPqj2Fhqf41CQ5o-98tk,1400
80
+ resources/queries/q7.sql,sha256=nx9_RD27ZRwVL9ft4XlmqHi8_fzHKkBk-oeazzMIbgM,621
81
+ resources/queries/q70.sql,sha256=FvfNqjqzjzi01T42OcHwjNy79IoJ4uLgxqqkJ-TQfrs,1144
82
+ resources/queries/q71.sql,sha256=qANpSw4VQ8hnUsAnUhybRxJogQqihr1pEe25-RoefTA,1550
83
+ resources/queries/q72.sql,sha256=DhcRW2Ebkh6M0kK58WdBFuXb2WrovwqejOPSMC1Z9jc,1166
84
+ resources/queries/q73.sql,sha256=dFLIGHVc8atujUtq6ytTks6wcoexqcSbfq6j65iL3FQ,1189
85
+ resources/queries/q74.sql,sha256=dc_vfeCoiyZAiVNaG5eZAYyHve8bxzCEM9Ct6yOWgoM,2037
86
+ resources/queries/q75.sql,sha256=JfB2Yb48ztnJ2Zzu7Olt0txOsGyO_LxZepwV-LpNw_w,3091
87
+ resources/queries/q76.sql,sha256=qRyV06LwHCAYvsbuKlfsG7uSDgMcEib-xLnLbeyyYYM,1143
88
+ resources/queries/q77.sql,sha256=m2gIbfiJQfQ_SI_KLboXHG9FCoqkex6LjRbox61-TIg,3103
89
+ resources/queries/q78.sql,sha256=U6-F4qWCjAEVbWITwscA5e8AYHp2oTHI8l1Hz-waDJQ,2103
90
+ resources/queries/q79.sql,sha256=LLlvDkG9pffqYG_FI9gIPC8Cp0A6pQhA0i4397ArsiM,915
91
+ resources/queries/q8.sql,sha256=fGlXIjsNORZ2Poh3-WOlyTs4Yev0SWgdCwO6vRtvx6M,6117
92
+ resources/queries/q80.sql,sha256=BoERhyoZQkF324dFTtXWQ1FbGpgECgQkasCN49ml9l8,2904
93
+ resources/queries/q81.sql,sha256=W6XT1bwR7iWV9H1tiE_K4I0hUJ8aiVy0kVW50hkDaaE,1281
94
+ resources/queries/q82.sql,sha256=mzPia-WUsBckcIawfTmbT4NGZcKxQhWAsfvUdtPjq-I,506
95
+ resources/queries/q83.sql,sha256=sjsx6E68g5YjZY0bkNyl2JOkMUTavse_-GejT0h1k9Q,1772
96
+ resources/queries/q84.sql,sha256=pqcU2aZ3Skc4wNCGZvF5ND54z4c7Tc7qaVodpkuIttg,617
97
+ resources/queries/q85.sql,sha256=5y10U9-sB_ytpACwnCZeEwLz3IfwnMAQ6I-SRPu1JA8,2039
98
+ resources/queries/q86.sql,sha256=pByvmQ3HIKSGnsYmfQS9rhcAk6yIxy4Z_njvm72kQFU,649
99
+ resources/queries/q87.sql,sha256=a8-79rL4_UDUiauGQCN6ZRE8L_fMwNo_dM9OWz3X33U,920
100
+ resources/queries/q88.sql,sha256=L99dHSuf2XuR_KYDMthldMVNVM7NZgAm1Y4gY7wy_OI,5150
101
+ resources/queries/q89.sql,sha256=HM_0TSFZW6Vzae_7jnA_70vXgjNGrjST-9PuEynuAJI,991
102
+ resources/queries/q9.sql,sha256=zhCamHClUEJeaXhFJOL9WCYtz2OpkDjA0r0fTyCVHJY,2147
103
+ resources/queries/q90.sql,sha256=PXUiDx_Ho2J74NNFd1ty4PmnUZi_6KKohPi1fd-L414,979
104
+ resources/queries/q91.sql,sha256=N2hIFKX8obNiPvlG2e7G5hJAXZ5gVvFD2jvTIm5Yemk,1089
105
+ resources/queries/q92.sql,sha256=6buORPAAzk8N251ABwk-IxrZr4fziZ0fez1WOrIVt0E,719
106
+ resources/queries/q93.sql,sha256=StkuqrAWnZ85JjPaD6IhGOfC7TPN1-rzk9KPNT5fhcg,798
107
+ resources/queries/q94.sql,sha256=PtxIr05pAeixCEFelSJoVL4eHOE8hOZSaIQgCK7S4yI,839
108
+ resources/queries/q95.sql,sha256=RNQPYyZIwNz2gdQ9JQtuDHkz_6ekJ6zGAe0xhPcnSww,1034
109
+ resources/queries/q96.sql,sha256=E4SiwqMu0caEc7NSw_lS0Tdsn6PFwF9JkMjvV9GzdKg,407
110
+ resources/queries/q97.sql,sha256=6LhpRiUg_nyLXSwajoTO-gIqypcILpvtgam00qPniYM,958
111
+ resources/queries/q98.sql,sha256=xAR1tfC6LloysNrfg9vFWmr7TZGzicMZv09Wxk1DY1w,737
112
+ resources/queries/q99.sql,sha256=PQqOgSeqjOjAqn8g9jLpSG73Jj014xoHDdJchW8d7uo,1174
113
+ databricks_tpcds-0.1.0.dist-info/METADATA,sha256=vjLDTYCkRC1aL08QhBNisfDwnevBufIiQBVgwn3DJ0M,3649
114
+ databricks_tpcds-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
115
+ databricks_tpcds-0.1.0.dist-info/top_level.txt,sha256=eCGMT_zAX9GV67BgZU59Ha2zM560Y1TWU9D-O7uREfw,27
116
+ databricks_tpcds-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ databricks_tpcds
2
+ resources
resources/__init__.py ADDED
File without changes
@@ -0,0 +1,2 @@
1
+ --TPC-DS Q0
2
+ select * from call_center limit 10;
@@ -0,0 +1,24 @@
1
+ --TPC-DS Q1
2
+ with customer_total_return as
3
+ (select sr_customer_sk as ctr_customer_sk
4
+ ,sr_store_sk as ctr_store_sk
5
+ ,sum(SR_RETURN_AMT_INC_TAX) as ctr_total_return
6
+ from store_returns
7
+ ,date_dim
8
+ where sr_returned_date_sk = d_date_sk
9
+ and d_year =1999
10
+ group by sr_customer_sk
11
+ ,sr_store_sk)
12
+ select c_customer_id
13
+ from customer_total_return ctr1
14
+ ,store
15
+ ,customer
16
+ where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
17
+ from customer_total_return ctr2
18
+ where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
19
+ and s_store_sk = ctr1.ctr_store_sk
20
+ and s_state = 'TN'
21
+ and ctr1.ctr_customer_sk = c_customer_sk
22
+ order by c_customer_id
23
+ limit 100;
24
+
@@ -0,0 +1,58 @@
1
+ --TPC-DS Q10
2
+ select
3
+ cd_gender,
4
+ cd_marital_status,
5
+ cd_education_status,
6
+ count(*) cnt1,
7
+ cd_purchase_estimate,
8
+ count(*) cnt2,
9
+ cd_credit_rating,
10
+ count(*) cnt3,
11
+ cd_dep_count,
12
+ count(*) cnt4,
13
+ cd_dep_employed_count,
14
+ count(*) cnt5,
15
+ cd_dep_college_count,
16
+ count(*) cnt6
17
+ from
18
+ customer c,customer_address ca,customer_demographics
19
+ where
20
+ c.c_current_addr_sk = ca.ca_address_sk and
21
+ ca_county in ('Clinton County','Platte County','Franklin County','Louisa County','Harmon County') and
22
+ cd_demo_sk = c.c_current_cdemo_sk and
23
+ exists (select *
24
+ from store_sales,date_dim
25
+ where c.c_customer_sk = ss_customer_sk and
26
+ ss_sold_date_sk = d_date_sk and
27
+ d_year = 2002 and
28
+ d_moy between 3 and 3+3) and
29
+ (exists (select *
30
+ from web_sales,date_dim
31
+ where c.c_customer_sk = ws_bill_customer_sk and
32
+ ws_sold_date_sk = d_date_sk and
33
+ d_year = 2002 and
34
+ d_moy between 3 ANd 3+3) or
35
+ exists (select *
36
+ from catalog_sales,date_dim
37
+ where c.c_customer_sk = cs_ship_customer_sk and
38
+ cs_sold_date_sk = d_date_sk and
39
+ d_year = 2002 and
40
+ d_moy between 3 and 3+3))
41
+ group by cd_gender,
42
+ cd_marital_status,
43
+ cd_education_status,
44
+ cd_purchase_estimate,
45
+ cd_credit_rating,
46
+ cd_dep_count,
47
+ cd_dep_employed_count,
48
+ cd_dep_college_count
49
+ order by cd_gender,
50
+ cd_marital_status,
51
+ cd_education_status,
52
+ cd_purchase_estimate,
53
+ cd_credit_rating,
54
+ cd_dep_count,
55
+ cd_dep_employed_count,
56
+ cd_dep_college_count
57
+ limit 100;
58
+
@@ -0,0 +1,80 @@
1
+ --TPC-DS Q11
2
+ with year_total as (
3
+ select c_customer_id customer_id
4
+ ,c_first_name customer_first_name
5
+ ,c_last_name customer_last_name
6
+ ,c_preferred_cust_flag customer_preferred_cust_flag
7
+ ,c_birth_country customer_birth_country
8
+ ,c_login customer_login
9
+ ,c_email_address customer_email_address
10
+ ,d_year dyear
11
+ ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total
12
+ ,'s' sale_type
13
+ from customer
14
+ ,store_sales
15
+ ,date_dim
16
+ where c_customer_sk = ss_customer_sk
17
+ and ss_sold_date_sk = d_date_sk
18
+ group by c_customer_id
19
+ ,c_first_name
20
+ ,c_last_name
21
+ ,c_preferred_cust_flag
22
+ ,c_birth_country
23
+ ,c_login
24
+ ,c_email_address
25
+ ,d_year
26
+ union all
27
+ select c_customer_id customer_id
28
+ ,c_first_name customer_first_name
29
+ ,c_last_name customer_last_name
30
+ ,c_preferred_cust_flag customer_preferred_cust_flag
31
+ ,c_birth_country customer_birth_country
32
+ ,c_login customer_login
33
+ ,c_email_address customer_email_address
34
+ ,d_year dyear
35
+ ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total
36
+ ,'w' sale_type
37
+ from customer
38
+ ,web_sales
39
+ ,date_dim
40
+ where c_customer_sk = ws_bill_customer_sk
41
+ and ws_sold_date_sk = d_date_sk
42
+ group by c_customer_id
43
+ ,c_first_name
44
+ ,c_last_name
45
+ ,c_preferred_cust_flag
46
+ ,c_birth_country
47
+ ,c_login
48
+ ,c_email_address
49
+ ,d_year
50
+ )
51
+ select
52
+ t_s_secyear.customer_id
53
+ ,t_s_secyear.customer_first_name
54
+ ,t_s_secyear.customer_last_name
55
+ ,t_s_secyear.customer_email_address
56
+ from year_total t_s_firstyear
57
+ ,year_total t_s_secyear
58
+ ,year_total t_w_firstyear
59
+ ,year_total t_w_secyear
60
+ where t_s_secyear.customer_id = t_s_firstyear.customer_id
61
+ and t_s_firstyear.customer_id = t_w_secyear.customer_id
62
+ and t_s_firstyear.customer_id = t_w_firstyear.customer_id
63
+ and t_s_firstyear.sale_type = 's'
64
+ and t_w_firstyear.sale_type = 'w'
65
+ and t_s_secyear.sale_type = 's'
66
+ and t_w_secyear.sale_type = 'w'
67
+ and t_s_firstyear.dyear = 1999
68
+ and t_s_secyear.dyear = 1999+1
69
+ and t_w_firstyear.dyear = 1999
70
+ and t_w_secyear.dyear = 1999+1
71
+ and t_s_firstyear.year_total > 0
72
+ and t_w_firstyear.year_total > 0
73
+ and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end
74
+ > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end
75
+ order by t_s_secyear.customer_id
76
+ ,t_s_secyear.customer_first_name
77
+ ,t_s_secyear.customer_last_name
78
+ ,t_s_secyear.customer_email_address
79
+ limit 100;
80
+
@@ -0,0 +1,33 @@
1
+ --TPC-DS Q12
2
+ select i_item_id
3
+ ,i_item_desc
4
+ ,i_category
5
+ ,i_class
6
+ ,i_current_price
7
+ ,sum(ws_ext_sales_price) as itemrevenue
8
+ ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over
9
+ (partition by i_class) as revenueratio
10
+ from
11
+ web_sales
12
+ ,item
13
+ ,date_dim
14
+ where
15
+ ws_item_sk = i_item_sk
16
+ and i_category in ('Jewelry', 'Books', 'Women')
17
+ and ws_sold_date_sk = d_date_sk
18
+ and d_date between cast('2002-03-22' as date)
19
+ and date_add(cast('2002-03-22' as date),30)
20
+ group by
21
+ i_item_id
22
+ ,i_item_desc
23
+ ,i_category
24
+ ,i_class
25
+ ,i_current_price
26
+ order by
27
+ i_category
28
+ ,i_class
29
+ ,i_item_id
30
+ ,i_item_desc
31
+ ,revenueratio
32
+ limit 100;
33
+
@@ -0,0 +1,51 @@
1
+ --TPC-DS Q13
2
+ select avg(ss_quantity)
3
+ ,avg(ss_ext_sales_price)
4
+ ,avg(ss_ext_wholesale_cost)
5
+ ,sum(ss_ext_wholesale_cost)
6
+ from store_sales
7
+ ,store
8
+ ,customer_demographics
9
+ ,household_demographics
10
+ ,customer_address
11
+ ,date_dim
12
+ where s_store_sk = ss_store_sk
13
+ and ss_sold_date_sk = d_date_sk and d_year = 2001
14
+ and((ss_hdemo_sk=hd_demo_sk
15
+ and cd_demo_sk = ss_cdemo_sk
16
+ and cd_marital_status = 'U'
17
+ and cd_education_status = '4 yr Degree'
18
+ and ss_sales_price between 100.00 and 150.00
19
+ and hd_dep_count = 3
20
+ )or
21
+ (ss_hdemo_sk=hd_demo_sk
22
+ and cd_demo_sk = ss_cdemo_sk
23
+ and cd_marital_status = 'S'
24
+ and cd_education_status = 'Unknown'
25
+ and ss_sales_price between 50.00 and 100.00
26
+ and hd_dep_count = 1
27
+ ) or
28
+ (ss_hdemo_sk=hd_demo_sk
29
+ and cd_demo_sk = ss_cdemo_sk
30
+ and cd_marital_status = 'D'
31
+ and cd_education_status = '2 yr Degree'
32
+ and ss_sales_price between 150.00 and 200.00
33
+ and hd_dep_count = 1
34
+ ))
35
+ and((ss_addr_sk = ca_address_sk
36
+ and ca_country = 'United States'
37
+ and ca_state in ('CO', 'MI', 'MN')
38
+ and ss_net_profit between 100 and 200
39
+ ) or
40
+ (ss_addr_sk = ca_address_sk
41
+ and ca_country = 'United States'
42
+ and ca_state in ('NC', 'NY', 'TX')
43
+ and ss_net_profit between 150 and 300
44
+ ) or
45
+ (ss_addr_sk = ca_address_sk
46
+ and ca_country = 'United States'
47
+ and ca_state in ('CA', 'NE', 'TN')
48
+ and ss_net_profit between 50 and 250
49
+ ))
50
+ ;
51
+