clope 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clope-0.1.2/LICENSE +24 -0
- clope-0.1.2/MANIFEST.in +3 -0
- clope-0.1.2/PKG-INFO +132 -0
- clope-0.1.2/README.md +113 -0
- clope-0.1.2/clope/__init__.py +1 -0
- clope-0.1.2/clope/clope.py +15 -0
- clope-0.1.2/clope/snow/__init__.py +1 -0
- clope-0.1.2/clope/snow/connection_handling.py +31 -0
- clope-0.1.2/clope/snow/dates.py +120 -0
- clope-0.1.2/clope/snow/dimensions.py +337 -0
- clope-0.1.2/clope/snow/facts.py +859 -0
- clope-0.1.2/clope/snow/functions.py +6 -0
- clope-0.1.2/clope/spotlight/__init__.py +1 -0
- clope-0.1.2/clope/spotlight/spotlight.py +102 -0
- clope-0.1.2/clope.egg-info/PKG-INFO +132 -0
- clope-0.1.2/clope.egg-info/SOURCES.txt +20 -0
- clope-0.1.2/clope.egg-info/dependency_links.txt +1 -0
- clope-0.1.2/clope.egg-info/requires.txt +5 -0
- clope-0.1.2/clope.egg-info/top_level.txt +1 -0
- clope-0.1.2/requirements.txt +5 -0
- clope-0.1.2/setup.cfg +4 -0
- clope-0.1.2/setup.py +26 -0
clope-0.1.2/LICENSE
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
This is free and unencumbered software released into the public domain.
|
|
2
|
+
|
|
3
|
+
Anyone is free to copy, modify, publish, use, compile, sell, or
|
|
4
|
+
distribute this software, either in source code form or as a compiled
|
|
5
|
+
binary, for any purpose, commercial or non-commercial, and by any
|
|
6
|
+
means.
|
|
7
|
+
|
|
8
|
+
In jurisdictions that recognize copyright laws, the author or authors
|
|
9
|
+
of this software dedicate any and all copyright interest in the
|
|
10
|
+
software to the public domain. We make this dedication for the benefit
|
|
11
|
+
of the public at large and to the detriment of our heirs and
|
|
12
|
+
successors. We intend this dedication to be an overt act of
|
|
13
|
+
relinquishment in perpetuity of all present and future rights to this
|
|
14
|
+
software under copyright law.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
19
|
+
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
20
|
+
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
21
|
+
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
|
23
|
+
|
|
24
|
+
For more information, please refer to <https://unlicense.org>
|
clope-0.1.2/MANIFEST.in
ADDED
clope-0.1.2/PKG-INFO
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: clope
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: Python package for interacting with the Cantaloupe/Seed vending system. Primarily the Spotlight API.
|
|
5
|
+
Home-page: https://github.com/pepsimidamerica/clope
|
|
6
|
+
Author: Jordan Maynor
|
|
7
|
+
Author-email: jmaynor@pepsimidamerica.com
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: The Unlicense (Unlicense)
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.12
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Requires-Dist: requests
|
|
15
|
+
Requires-Dist: pandas==2.2.2
|
|
16
|
+
Requires-Dist: pyarrow==17.0.0
|
|
17
|
+
Requires-Dist: openpyxl==3.1.5
|
|
18
|
+
Requires-Dist: snowflake-connector-python[pandas]==3.11.0
|
|
19
|
+
|
|
20
|
+
# Overview
|
|
21
|
+
|
|
22
|
+
clope (see-lope) is a Python package for interacting with the Cantaloupe/Seed vending system. Primarily being a wrapper for their Spotlight API. It uses the pandas library to return information from a given spotlight report as a dataframe object. clope also has functionality for connecting to the snowflake data warehouse Cantaloupe product as well.
|
|
23
|
+
|
|
24
|
+
## Installation
|
|
25
|
+
|
|
26
|
+
Haven't yet bothered to publish as a python package, intent is to simply add clope as a git submodule in any projects where it's needed.
|
|
27
|
+
|
|
28
|
+
## Usage
|
|
29
|
+
|
|
30
|
+
Several environment variables are required for clope to function. Functionality is divided into two modules, so vars are only required if you are using functions from that particular module.
|
|
31
|
+
|
|
32
|
+
| Module | Required? | Env Variable | Description |
|
|
33
|
+
| --------- | --------- | ------------ | ----------- |
|
|
34
|
+
| Spotlight | Yes | CLO_USERNAME | Username of the Spotlight API user. Should be provided by Cantaloupe. |
|
|
35
|
+
| Spotlight | Yes | CLO_PASSWORD | Password of the Spotlight API user. Should be provided by Cantaloupe. |
|
|
36
|
+
| Spotlight | No | CLO_BASE_URL | Not actually sure if this varies between clients. I have this as an optional variable in case it does. Default value if no env variable is <https://api.mycantaloupe.com>, otherwise can be overridden. |
|
|
37
|
+
| Spotlight | No | CLO_ARCHIVE_FILES | Optional variable. Will archive the interim excel files that run_report() generates so can be later looked at in the Archive folder. Default behavior is to not archive and simply delete the excel files after data is pulled from them. |
|
|
38
|
+
| Snowflake | Yes | SNOWFLAKE_USER | Username of the Snowflake user |
|
|
39
|
+
| Snowflake | Yes | SNOWFLAKE_PASSWORD | Password of the snowflake user |
|
|
40
|
+
| Snowflake | Yes | SNOWFLAKE_ACCOUNT | Snowflake account you're connecting to. Should be something along the lines of "{Cantaloupe account}-{Your Company Name}" |
|
|
41
|
+
| Snowflake | Yes | SNOWFLAKE_DATABASE | Snowflake database to connect to. Likely begins with "PRD_SEED...". |
|
|
42
|
+
|
|
43
|
+
## Spotlight
|
|
44
|
+
|
|
45
|
+
The spotlight module invloves interaction with the Cantaloupe Spotlight API. The API essentially allows remotely run a spotlight report and getting the raw excel data via HTTP requests. Reports must be set up in the browser prior to using the API. Fairly quick and suited for getting data that needs to be up-to-date at that moment.
|
|
46
|
+
|
|
47
|
+
### Run Spotlight Report (run_report())
|
|
48
|
+
|
|
49
|
+
The primary function. Used to run a spotlight report, retrieve the excel results, and transform the excel file into a workable pandas dataframe. Cantaloupe's spotlight reports return an excel file with two tabs: Report and Stats. This pulls the info from the Report tab, Stats is ignored.
|
|
50
|
+
|
|
51
|
+
> Note: Make sure your spotlight report has been shared with the "Seed Spotlight API Users" security group in Seed Office. Won't be accessible otherwise.
|
|
52
|
+
|
|
53
|
+
Takes in two parameters:
|
|
54
|
+
|
|
55
|
+
*report_id*
|
|
56
|
+
|
|
57
|
+
A string ID for the report in Cantaloupe. When logged into Seed Office, the report ID can be found in the URL. E.G. <https://mycantaloupe.com/cs3/ReportsEdit/Run?ReportId=XXXXX>, XXXXX being the report ID needed.
|
|
58
|
+
|
|
59
|
+
*params*
|
|
60
|
+
|
|
61
|
+
Optional parameter, list of tuples of strings. Some Spotlight reports have required filters which must be supplied to get data back. Date ranges being a common one. Cantaloupe's error messages are fairly clear, in my experience, with telling you what parameteres are needed to run the report and in what format they should be. First element of tuple is filter name and second is filter value. Filter names are in format of "filter0", "filter1", "filter2", etc.
|
|
62
|
+
|
|
63
|
+
Example call
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
# Import package
|
|
67
|
+
from clope import run_report
|
|
68
|
+
|
|
69
|
+
# Run report with a report_id and additional parameters
|
|
70
|
+
df_report = run_report('123', [('filter0', '2024-01-01'), ('filter0', '2024-01-31')])
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Snowflake
|
|
74
|
+
|
|
75
|
+
Cantaloupe also offers a data warehouse product in Snowflake. Good for aggregating lots of information, as well as pulling historical information. However, notably, data is only pushed from Seed into the Snowflake data warehouse once a day, so it is not necessarily going to be accurate as of that moment.
|
|
76
|
+
|
|
77
|
+
Also something to keep in mind is that the system makes use of SCD (slowly changing dimension) in order to keep track of historical info vs current info. So some care should be taken when interpreting the data.
|
|
78
|
+
|
|
79
|
+
For each dataset that uses SCD, a parameter has been included to restrict to current data only or include all data.
|
|
80
|
+
|
|
81
|
+
### Dates
|
|
82
|
+
|
|
83
|
+
In Snowflake, most date columns are represented by an integer key, rather than the date itself. A couple functions are included with regards to dates. If working directly with Snowflake, you would join the date table onto the fact table you're working with. However, from what I can see the dates are largely deterministic. 1 is 1900-01-01, 2 is 1900-01-02. So I just directly translate from key to date and vice versa with some date math. Much quicker and should give same results as querying the date table itself.
|
|
84
|
+
|
|
85
|
+
### Dimensions
|
|
86
|
+
|
|
87
|
+
Dimensions describe facts. The location something happened in. The route it happened on. Dimensions generally change over time and make the most use of the SCD schema.
|
|
88
|
+
|
|
89
|
+
- Barcodes (for each pack)
|
|
90
|
+
- Branches
|
|
91
|
+
- Coils (planogram slots)
|
|
92
|
+
- Customers
|
|
93
|
+
- Devices (telemetry)
|
|
94
|
+
- Item Packs (UOMs)
|
|
95
|
+
- Items
|
|
96
|
+
- Lines of Business
|
|
97
|
+
- Locations
|
|
98
|
+
- Machines
|
|
99
|
+
- Micromarkets
|
|
100
|
+
- Operators
|
|
101
|
+
- Routes
|
|
102
|
+
- Supplier Branch
|
|
103
|
+
- Supplier Items (Not yet used seemingly)
|
|
104
|
+
- Suppliers
|
|
105
|
+
- Warehouses
|
|
106
|
+
- Machine Alerts
|
|
107
|
+
|
|
108
|
+
### Facts
|
|
109
|
+
|
|
110
|
+
A fact is the central information being stored. Generally, things that are not changing. A sale, an inventory, a product movement.
|
|
111
|
+
|
|
112
|
+
- Cashless Vending Tranaction
|
|
113
|
+
- Collection Micromarket Sales
|
|
114
|
+
- Order to Fulfillment (Delivery)
|
|
115
|
+
- Order to Fulfillment (Vending and Micromarket)
|
|
116
|
+
- Delivery Order Receive
|
|
117
|
+
- Sales Revenue By Day
|
|
118
|
+
- Sales Revenue By Visit
|
|
119
|
+
- Sales By Coil
|
|
120
|
+
- Scheduling Machine
|
|
121
|
+
- Scheduling Route Summary
|
|
122
|
+
- Telemetry Sales
|
|
123
|
+
- Vending Micromarket Visit
|
|
124
|
+
- Warehouse Inventory
|
|
125
|
+
- Warehouse Observed Inventory
|
|
126
|
+
- Warehouse Product Movement
|
|
127
|
+
- Warehouse Purchase
|
|
128
|
+
- Warehouse Receive
|
|
129
|
+
|
|
130
|
+
### Functions
|
|
131
|
+
|
|
132
|
+
Also included in Cantaloupe's Snowflake are a couple functions. General intention seems to be gathering a subset of data from a couple core fact tables. Haven't yet implemented wrappers for these.
|
clope-0.1.2/README.md
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
# Overview
|
|
2
|
+
|
|
3
|
+
clope (see-lope) is a Python package for interacting with the Cantaloupe/Seed vending system. Primarily being a wrapper for their Spotlight API. It uses the pandas library to return information from a given spotlight report as a dataframe object. clope also has functionality for connecting to the snowflake data warehouse Cantaloupe product as well.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
Haven't yet bothered to publish as a python package, intent is to simply add clope as a git submodule in any projects where it's needed.
|
|
8
|
+
|
|
9
|
+
## Usage
|
|
10
|
+
|
|
11
|
+
Several environment variables are required for clope to function. Functionality is divided into two modules, so vars are only required if you are using functions from that particular module.
|
|
12
|
+
|
|
13
|
+
| Module | Required? | Env Variable | Description |
|
|
14
|
+
| --------- | --------- | ------------ | ----------- |
|
|
15
|
+
| Spotlight | Yes | CLO_USERNAME | Username of the Spotlight API user. Should be provided by Cantaloupe. |
|
|
16
|
+
| Spotlight | Yes | CLO_PASSWORD | Password of the Spotlight API user. Should be provided by Cantaloupe. |
|
|
17
|
+
| Spotlight | No | CLO_BASE_URL | Not actually sure if this varies between clients. I have this as an optional variable in case it does. Default value if no env variable is <https://api.mycantaloupe.com>, otherwise can be overridden. |
|
|
18
|
+
| Spotlight | No | CLO_ARCHIVE_FILES | Optional variable. Will archive the interim excel files that run_report() generates so can be later looked at in the Archive folder. Default behavior is to not archive and simply delete the excel files after data is pulled from them. |
|
|
19
|
+
| Snowflake | Yes | SNOWFLAKE_USER | Username of the Snowflake user |
|
|
20
|
+
| Snowflake | Yes | SNOWFLAKE_PASSWORD | Password of the snowflake user |
|
|
21
|
+
| Snowflake | Yes | SNOWFLAKE_ACCOUNT | Snowflake account you're connecting to. Should be something along the lines of "{Cantaloupe account}-{Your Company Name}" |
|
|
22
|
+
| Snowflake | Yes | SNOWFLAKE_DATABASE | Snowflake database to connect to. Likely begins with "PRD_SEED...". |
|
|
23
|
+
|
|
24
|
+
## Spotlight
|
|
25
|
+
|
|
26
|
+
The spotlight module invloves interaction with the Cantaloupe Spotlight API. The API essentially allows remotely run a spotlight report and getting the raw excel data via HTTP requests. Reports must be set up in the browser prior to using the API. Fairly quick and suited for getting data that needs to be up-to-date at that moment.
|
|
27
|
+
|
|
28
|
+
### Run Spotlight Report (run_report())
|
|
29
|
+
|
|
30
|
+
The primary function. Used to run a spotlight report, retrieve the excel results, and transform the excel file into a workable pandas dataframe. Cantaloupe's spotlight reports return an excel file with two tabs: Report and Stats. This pulls the info from the Report tab, Stats is ignored.
|
|
31
|
+
|
|
32
|
+
> Note: Make sure your spotlight report has been shared with the "Seed Spotlight API Users" security group in Seed Office. Won't be accessible otherwise.
|
|
33
|
+
|
|
34
|
+
Takes in two parameters:
|
|
35
|
+
|
|
36
|
+
*report_id*
|
|
37
|
+
|
|
38
|
+
A string ID for the report in Cantaloupe. When logged into Seed Office, the report ID can be found in the URL. E.G. <https://mycantaloupe.com/cs3/ReportsEdit/Run?ReportId=XXXXX>, XXXXX being the report ID needed.
|
|
39
|
+
|
|
40
|
+
*params*
|
|
41
|
+
|
|
42
|
+
Optional parameter, list of tuples of strings. Some Spotlight reports have required filters which must be supplied to get data back. Date ranges being a common one. Cantaloupe's error messages are fairly clear, in my experience, with telling you what parameteres are needed to run the report and in what format they should be. First element of tuple is filter name and second is filter value. Filter names are in format of "filter0", "filter1", "filter2", etc.
|
|
43
|
+
|
|
44
|
+
Example call
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
# Import package
|
|
48
|
+
from clope import run_report
|
|
49
|
+
|
|
50
|
+
# Run report with a report_id and additional parameters
|
|
51
|
+
df_report = run_report('123', [('filter0', '2024-01-01'), ('filter0', '2024-01-31')])
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Snowflake
|
|
55
|
+
|
|
56
|
+
Cantaloupe also offers a data warehouse product in Snowflake. Good for aggregating lots of information, as well as pulling historical information. However, notably, data is only pushed from Seed into the Snowflake data warehouse once a day, so it is not necessarily going to be accurate as of that moment.
|
|
57
|
+
|
|
58
|
+
Also something to keep in mind is that the system makes use of SCD (slowly changing dimension) in order to keep track of historical info vs current info. So some care should be taken when interpreting the data.
|
|
59
|
+
|
|
60
|
+
For each dataset that uses SCD, a parameter has been included to restrict to current data only or include all data.
|
|
61
|
+
|
|
62
|
+
### Dates
|
|
63
|
+
|
|
64
|
+
In Snowflake, most date columns are represented by an integer key, rather than the date itself. A couple functions are included with regards to dates. If working directly with Snowflake, you would join the date table onto the fact table you're working with. However, from what I can see the dates are largely deterministic. 1 is 1900-01-01, 2 is 1900-01-02. So I just directly translate from key to date and vice versa with some date math. Much quicker and should give same results as querying the date table itself.
|
|
65
|
+
|
|
66
|
+
### Dimensions
|
|
67
|
+
|
|
68
|
+
Dimensions describe facts. The location something happened in. The route it happened on. Dimensions generally change over time and make the most use of the SCD schema.
|
|
69
|
+
|
|
70
|
+
- Barcodes (for each pack)
|
|
71
|
+
- Branches
|
|
72
|
+
- Coils (planogram slots)
|
|
73
|
+
- Customers
|
|
74
|
+
- Devices (telemetry)
|
|
75
|
+
- Item Packs (UOMs)
|
|
76
|
+
- Items
|
|
77
|
+
- Lines of Business
|
|
78
|
+
- Locations
|
|
79
|
+
- Machines
|
|
80
|
+
- Micromarkets
|
|
81
|
+
- Operators
|
|
82
|
+
- Routes
|
|
83
|
+
- Supplier Branch
|
|
84
|
+
- Supplier Items (Not yet used seemingly)
|
|
85
|
+
- Suppliers
|
|
86
|
+
- Warehouses
|
|
87
|
+
- Machine Alerts
|
|
88
|
+
|
|
89
|
+
### Facts
|
|
90
|
+
|
|
91
|
+
A fact is the central information being stored. Generally, things that are not changing. A sale, an inventory, a product movement.
|
|
92
|
+
|
|
93
|
+
- Cashless Vending Tranaction
|
|
94
|
+
- Collection Micromarket Sales
|
|
95
|
+
- Order to Fulfillment (Delivery)
|
|
96
|
+
- Order to Fulfillment (Vending and Micromarket)
|
|
97
|
+
- Delivery Order Receive
|
|
98
|
+
- Sales Revenue By Day
|
|
99
|
+
- Sales Revenue By Visit
|
|
100
|
+
- Sales By Coil
|
|
101
|
+
- Scheduling Machine
|
|
102
|
+
- Scheduling Route Summary
|
|
103
|
+
- Telemetry Sales
|
|
104
|
+
- Vending Micromarket Visit
|
|
105
|
+
- Warehouse Inventory
|
|
106
|
+
- Warehouse Observed Inventory
|
|
107
|
+
- Warehouse Product Movement
|
|
108
|
+
- Warehouse Purchase
|
|
109
|
+
- Warehouse Receive
|
|
110
|
+
|
|
111
|
+
### Functions
|
|
112
|
+
|
|
113
|
+
Also included in Cantaloupe's Snowflake are a couple functions. General intention seems to be gathering a subset of data from a couple core fact tables. Haven't yet implemented wrappers for these.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .clope import *
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
clope is a package for pulling data from the Cantaloupe/Seed Office system.
|
|
3
|
+
Primarily via the Spotlight API.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from .snow.dates import *
|
|
7
|
+
from .snow.dimensions import *
|
|
8
|
+
from .snow.facts import *
|
|
9
|
+
from .spotlight.spotlight import run_report
|
|
10
|
+
|
|
11
|
+
if __name__ == "__main__":
|
|
12
|
+
"""
|
|
13
|
+
Example usage
|
|
14
|
+
"""
|
|
15
|
+
pass
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Empty
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import snowflake.connector
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _get_snowflake_connection(
|
|
7
|
+
schema: str = "PUBLIC",
|
|
8
|
+
) -> snowflake.connector.SnowflakeConnection:
|
|
9
|
+
"""
|
|
10
|
+
Connect to Snowflake data warehouse using environment variables. By default,
|
|
11
|
+
connects to the main PUBLIC schema. Can be overridden to connect to others.
|
|
12
|
+
"""
|
|
13
|
+
for env in [
|
|
14
|
+
"SNOWFLAKE_USER",
|
|
15
|
+
"SNOWFLAKE_PASSWORD",
|
|
16
|
+
"SNOWFLAKE_ACCOUNT",
|
|
17
|
+
"SNOWFLAKE_WAREHOUSE",
|
|
18
|
+
"SNOWFLAKE_DATABASE",
|
|
19
|
+
]:
|
|
20
|
+
if env not in os.environ:
|
|
21
|
+
raise Exception(f"Missing required environment variable: {env}")
|
|
22
|
+
|
|
23
|
+
conn = snowflake.connector.connect(
|
|
24
|
+
user=os.environ["SNOWFLAKE_USER"],
|
|
25
|
+
password=os.environ["SNOWFLAKE_PASSWORD"],
|
|
26
|
+
account=os.environ["SNOWFLAKE_ACCOUNT"],
|
|
27
|
+
warehouse=os.environ["SNOWFLAKE_WAREHOUSE"],
|
|
28
|
+
database=os.environ["SNOWFLAKE_DATABASE"],
|
|
29
|
+
schema=schema,
|
|
30
|
+
)
|
|
31
|
+
return conn
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module contains functionality to translate between datekeys and dates.
|
|
3
|
+
|
|
4
|
+
Technically there are tables containing this data, but I don't think
|
|
5
|
+
it's necessary to actually query them. We can just calculate them directly.
|
|
6
|
+
Should be much quicker.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from datetime import datetime, timedelta
|
|
10
|
+
|
|
11
|
+
import pandas
|
|
12
|
+
|
|
13
|
+
# from connection_handling import _get_snowflake_connection
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def datekey_to_date(datekey: int) -> datetime:
|
|
17
|
+
"""
|
|
18
|
+
Convert a datekey to a datetime object.
|
|
19
|
+
"""
|
|
20
|
+
base_date = datetime(1900, 1, 1)
|
|
21
|
+
return base_date + timedelta(days=datekey - 1)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def date_to_datekey(date: datetime) -> int:
|
|
25
|
+
"""
|
|
26
|
+
Convert a datetime object to a datekey.
|
|
27
|
+
"""
|
|
28
|
+
base_date = datetime(1900, 1, 1)
|
|
29
|
+
return (date - base_date).days + 1
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_datekey_range(start_date: datetime, end_date: datetime) -> list[int]:
|
|
33
|
+
"""
|
|
34
|
+
Get a list of datekeys between two dates.
|
|
35
|
+
"""
|
|
36
|
+
return [date_to_datekey(x) for x in pandas.date_range(start_date, end_date)]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_date_range(start_datekey: int, end_datekey: int) -> list[datetime]:
|
|
40
|
+
"""
|
|
41
|
+
Get a list of dates between two datekeys.
|
|
42
|
+
"""
|
|
43
|
+
return [datekey_to_date(x) for x in range(start_datekey, end_datekey + 1)]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def monthkey_to_date(monthkey: int) -> datetime:
|
|
47
|
+
"""
|
|
48
|
+
Convert a monthkey to a datetime object representing the first day of the month.
|
|
49
|
+
"""
|
|
50
|
+
year = 1900 + (monthkey - 1) // 12
|
|
51
|
+
month = (monthkey - 1) % 12 + 1
|
|
52
|
+
return datetime(year, month, 1)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def date_to_monthkey(date: datetime) -> int:
|
|
56
|
+
"""
|
|
57
|
+
Convert a datetime object to a monthkey.
|
|
58
|
+
"""
|
|
59
|
+
return (date.year - 1900) * 12 + date.month
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def quarterkey_to_date(quarterkey: int) -> datetime:
|
|
63
|
+
"""
|
|
64
|
+
Convert a quarterkey to a datetime object representing the first day of the quarter.
|
|
65
|
+
"""
|
|
66
|
+
year = 1900 + (quarterkey - 1) // 4
|
|
67
|
+
quarter = (quarterkey - 1) % 4 + 1
|
|
68
|
+
month = (quarter - 1) * 3 + 1
|
|
69
|
+
return datetime(year, month, 1)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def date_to_quarterkey(date: datetime) -> int:
|
|
73
|
+
"""
|
|
74
|
+
Convert a datetime object to a quarterkey.
|
|
75
|
+
"""
|
|
76
|
+
quarter = (date.month - 1) // 3 + 1
|
|
77
|
+
return (date.year - 1900) * 4 + quarter
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def yearkey_to_date(yearkey: int) -> datetime:
|
|
81
|
+
"""
|
|
82
|
+
Convert a yearkey to a datetime object representing the first day of the year.
|
|
83
|
+
"""
|
|
84
|
+
year = 1900 + yearkey - 1
|
|
85
|
+
return datetime(year, 1, 1)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def date_to_yearkey(date: datetime) -> int:
|
|
89
|
+
"""
|
|
90
|
+
Convert a datetime object to a yearkey.
|
|
91
|
+
"""
|
|
92
|
+
return date.year - 1900 + 1
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# Commented out in-progress code
|
|
96
|
+
# def date_dim(
|
|
97
|
+
# datekey: int | None = None, date: datetime | None = None
|
|
98
|
+
# ) -> pandas.DataFrame:
|
|
99
|
+
# """
|
|
100
|
+
# Retrieve corresponding datekey from a given date or vice versa.
|
|
101
|
+
# """
|
|
102
|
+
# if date and datekey:
|
|
103
|
+
# raise Exception("Only one of datekey or date must be provided")
|
|
104
|
+
# if not datekey and not date:
|
|
105
|
+
# raise Exception("Either datekey or date must be provided")
|
|
106
|
+
|
|
107
|
+
# conn = _get_snowflake_connection(schema="TIME_DIMENSION")
|
|
108
|
+
# try:
|
|
109
|
+
# if datekey:
|
|
110
|
+
# query = f"SELECT * FROM DATE_DIM WHERE DATEKEY = {datekey}"
|
|
111
|
+
# else:
|
|
112
|
+
# query = f"SELECT * FROM DATE_DIM WHERE DATE = '{date}'"
|
|
113
|
+
# cur = conn.cursor()
|
|
114
|
+
# cur.execute(query)
|
|
115
|
+
# df = cur.fetch_pandas_all()
|
|
116
|
+
# except Exception as e:
|
|
117
|
+
# raise Exception("Error reading Snowflake table", e)
|
|
118
|
+
# finally:
|
|
119
|
+
# conn.close()
|
|
120
|
+
# return df
|