clope 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
clope-0.1.2/LICENSE ADDED
@@ -0,0 +1,24 @@
1
+ This is free and unencumbered software released into the public domain.
2
+
3
+ Anyone is free to copy, modify, publish, use, compile, sell, or
4
+ distribute this software, either in source code form or as a compiled
5
+ binary, for any purpose, commercial or non-commercial, and by any
6
+ means.
7
+
8
+ In jurisdictions that recognize copyright laws, the author or authors
9
+ of this software dedicate any and all copyright interest in the
10
+ software to the public domain. We make this dedication for the benefit
11
+ of the public at large and to the detriment of our heirs and
12
+ successors. We intend this dedication to be an overt act of
13
+ relinquishment in perpetuity of all present and future rights to this
14
+ software under copyright law.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
+ IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
23
+
24
+ For more information, please refer to <https://unlicense.org>
@@ -0,0 +1,3 @@
1
+ include requirements.txt
2
+ include README.md
3
+ include LICENSE
clope-0.1.2/PKG-INFO ADDED
@@ -0,0 +1,132 @@
1
+ Metadata-Version: 2.1
2
+ Name: clope
3
+ Version: 0.1.2
4
+ Summary: Python package for interacting with the Cantaloupe/Seed vending system. Primarily the Spotlight API.
5
+ Home-page: https://github.com/pepsimidamerica/clope
6
+ Author: Jordan Maynor
7
+ Author-email: jmaynor@pepsimidamerica.com
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: The Unlicense (Unlicense)
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.12
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: requests
15
+ Requires-Dist: pandas==2.2.2
16
+ Requires-Dist: pyarrow==17.0.0
17
+ Requires-Dist: openpyxl==3.1.5
18
+ Requires-Dist: snowflake-connector-python[pandas]==3.11.0
19
+
20
+ # Overview
21
+
22
+ clope (see-lope) is a Python package for interacting with the Cantaloupe/Seed vending system. Primarily being a wrapper for their Spotlight API. It uses the pandas library to return information from a given spotlight report as a dataframe object. clope also has functionality for connecting to the snowflake data warehouse Cantaloupe product as well.
23
+
24
+ ## Installation
25
+
26
+ Haven't yet bothered to publish as a python package, intent is to simply add clope as a git submodule in any projects where it's needed.
27
+
28
+ ## Usage
29
+
30
+ Several environment variables are required for clope to function. Functionality is divided into two modules, so vars are only required if you are using functions from that particular module.
31
+
32
+ | Module | Required? | Env Variable | Description |
33
+ | --------- | --------- | ------------ | ----------- |
34
+ | Spotlight | Yes | CLO_USERNAME | Username of the Spotlight API user. Should be provided by Cantaloupe. |
35
+ | Spotlight | Yes | CLO_PASSWORD | Password of the Spotlight API user. Should be provided by Cantaloupe. |
36
+ | Spotlight | No | CLO_BASE_URL | Not actually sure if this varies between clients. I have this as an optional variable in case it does. Default value if no env variable is <https://api.mycantaloupe.com>, otherwise can be overridden. |
37
+ | Spotlight | No | CLO_ARCHIVE_FILES | Optional variable. Will archive the interim excel files that run_report() generates so can be later looked at in the Archive folder. Default behavior is to not archive and simply delete the excel files after data is pulled from them. |
38
+ | Snowflake | Yes | SNOWFLAKE_USER | Username of the Snowflake user |
39
+ | Snowflake | Yes | SNOWFLAKE_PASSWORD | Password of the snowflake user |
40
+ | Snowflake | Yes | SNOWFLAKE_ACCOUNT | Snowflake account you're connecting to. Should be something along the lines of "{Cantaloupe account}-{Your Company Name}" |
41
+ | Snowflake | Yes | SNOWFLAKE_DATABASE | Snowflake database to connect to. Likely begins with "PRD_SEED...". |
42
+
43
+ ## Spotlight
44
+
45
+ The spotlight module invloves interaction with the Cantaloupe Spotlight API. The API essentially allows remotely run a spotlight report and getting the raw excel data via HTTP requests. Reports must be set up in the browser prior to using the API. Fairly quick and suited for getting data that needs to be up-to-date at that moment.
46
+
47
+ ### Run Spotlight Report (run_report())
48
+
49
+ The primary function. Used to run a spotlight report, retrieve the excel results, and transform the excel file into a workable pandas dataframe. Cantaloupe's spotlight reports return an excel file with two tabs: Report and Stats. This pulls the info from the Report tab, Stats is ignored.
50
+
51
+ > Note: Make sure your spotlight report has been shared with the "Seed Spotlight API Users" security group in Seed Office. Won't be accessible otherwise.
52
+
53
+ Takes in two parameters:
54
+
55
+ *report_id*
56
+
57
+ A string ID for the report in Cantaloupe. When logged into Seed Office, the report ID can be found in the URL. E.G. <https://mycantaloupe.com/cs3/ReportsEdit/Run?ReportId=XXXXX>, XXXXX being the report ID needed.
58
+
59
+ *params*
60
+
61
+ Optional parameter, list of tuples of strings. Some Spotlight reports have required filters which must be supplied to get data back. Date ranges being a common one. Cantaloupe's error messages are fairly clear, in my experience, with telling you what parameteres are needed to run the report and in what format they should be. First element of tuple is filter name and second is filter value. Filter names are in format of "filter0", "filter1", "filter2", etc.
62
+
63
+ Example call
64
+
65
+ ```python
66
+ # Import package
67
+ from clope import run_report
68
+
69
+ # Run report with a report_id and additional parameters
70
+ df_report = run_report('123', [('filter0', '2024-01-01'), ('filter0', '2024-01-31')])
71
+ ```
72
+
73
+ ## Snowflake
74
+
75
+ Cantaloupe also offers a data warehouse product in Snowflake. Good for aggregating lots of information, as well as pulling historical information. However, notably, data is only pushed from Seed into the Snowflake data warehouse once a day, so it is not necessarily going to be accurate as of that moment.
76
+
77
+ Also something to keep in mind is that the system makes use of SCD (slowly changing dimension) in order to keep track of historical info vs current info. So some care should be taken when interpreting the data.
78
+
79
+ For each dataset that uses SCD, a parameter has been included to restrict to current data only or include all data.
80
+
81
+ ### Dates
82
+
83
+ In Snowflake, most date columns are represented by an integer key, rather than the date itself. A couple functions are included with regards to dates. If working directly with Snowflake, you would join the date table onto the fact table you're working with. However, from what I can see the dates are largely deterministic. 1 is 1900-01-01, 2 is 1900-01-02. So I just directly translate from key to date and vice versa with some date math. Much quicker and should give same results as querying the date table itself.
84
+
85
+ ### Dimensions
86
+
87
+ Dimensions describe facts. The location something happened in. The route it happened on. Dimensions generally change over time and make the most use of the SCD schema.
88
+
89
+ - Barcodes (for each pack)
90
+ - Branches
91
+ - Coils (planogram slots)
92
+ - Customers
93
+ - Devices (telemetry)
94
+ - Item Packs (UOMs)
95
+ - Items
96
+ - Lines of Business
97
+ - Locations
98
+ - Machines
99
+ - Micromarkets
100
+ - Operators
101
+ - Routes
102
+ - Supplier Branch
103
+ - Supplier Items (Not yet used seemingly)
104
+ - Suppliers
105
+ - Warehouses
106
+ - Machine Alerts
107
+
108
+ ### Facts
109
+
110
+ A fact is the central information being stored. Generally, things that are not changing. A sale, an inventory, a product movement.
111
+
112
+ - Cashless Vending Tranaction
113
+ - Collection Micromarket Sales
114
+ - Order to Fulfillment (Delivery)
115
+ - Order to Fulfillment (Vending and Micromarket)
116
+ - Delivery Order Receive
117
+ - Sales Revenue By Day
118
+ - Sales Revenue By Visit
119
+ - Sales By Coil
120
+ - Scheduling Machine
121
+ - Scheduling Route Summary
122
+ - Telemetry Sales
123
+ - Vending Micromarket Visit
124
+ - Warehouse Inventory
125
+ - Warehouse Observed Inventory
126
+ - Warehouse Product Movement
127
+ - Warehouse Purchase
128
+ - Warehouse Receive
129
+
130
+ ### Functions
131
+
132
+ Also included in Cantaloupe's Snowflake are a couple functions. General intention seems to be gathering a subset of data from a couple core fact tables. Haven't yet implemented wrappers for these.
clope-0.1.2/README.md ADDED
@@ -0,0 +1,113 @@
1
+ # Overview
2
+
3
+ clope (see-lope) is a Python package for interacting with the Cantaloupe/Seed vending system. Primarily being a wrapper for their Spotlight API. It uses the pandas library to return information from a given spotlight report as a dataframe object. clope also has functionality for connecting to the snowflake data warehouse Cantaloupe product as well.
4
+
5
+ ## Installation
6
+
7
+ Haven't yet bothered to publish as a python package, intent is to simply add clope as a git submodule in any projects where it's needed.
8
+
9
+ ## Usage
10
+
11
+ Several environment variables are required for clope to function. Functionality is divided into two modules, so vars are only required if you are using functions from that particular module.
12
+
13
+ | Module | Required? | Env Variable | Description |
14
+ | --------- | --------- | ------------ | ----------- |
15
+ | Spotlight | Yes | CLO_USERNAME | Username of the Spotlight API user. Should be provided by Cantaloupe. |
16
+ | Spotlight | Yes | CLO_PASSWORD | Password of the Spotlight API user. Should be provided by Cantaloupe. |
17
+ | Spotlight | No | CLO_BASE_URL | Not actually sure if this varies between clients. I have this as an optional variable in case it does. Default value if no env variable is <https://api.mycantaloupe.com>, otherwise can be overridden. |
18
+ | Spotlight | No | CLO_ARCHIVE_FILES | Optional variable. Will archive the interim excel files that run_report() generates so can be later looked at in the Archive folder. Default behavior is to not archive and simply delete the excel files after data is pulled from them. |
19
+ | Snowflake | Yes | SNOWFLAKE_USER | Username of the Snowflake user |
20
+ | Snowflake | Yes | SNOWFLAKE_PASSWORD | Password of the snowflake user |
21
+ | Snowflake | Yes | SNOWFLAKE_ACCOUNT | Snowflake account you're connecting to. Should be something along the lines of "{Cantaloupe account}-{Your Company Name}" |
22
+ | Snowflake | Yes | SNOWFLAKE_DATABASE | Snowflake database to connect to. Likely begins with "PRD_SEED...". |
23
+
24
+ ## Spotlight
25
+
26
+ The spotlight module invloves interaction with the Cantaloupe Spotlight API. The API essentially allows remotely run a spotlight report and getting the raw excel data via HTTP requests. Reports must be set up in the browser prior to using the API. Fairly quick and suited for getting data that needs to be up-to-date at that moment.
27
+
28
+ ### Run Spotlight Report (run_report())
29
+
30
+ The primary function. Used to run a spotlight report, retrieve the excel results, and transform the excel file into a workable pandas dataframe. Cantaloupe's spotlight reports return an excel file with two tabs: Report and Stats. This pulls the info from the Report tab, Stats is ignored.
31
+
32
+ > Note: Make sure your spotlight report has been shared with the "Seed Spotlight API Users" security group in Seed Office. Won't be accessible otherwise.
33
+
34
+ Takes in two parameters:
35
+
36
+ *report_id*
37
+
38
+ A string ID for the report in Cantaloupe. When logged into Seed Office, the report ID can be found in the URL. E.G. <https://mycantaloupe.com/cs3/ReportsEdit/Run?ReportId=XXXXX>, XXXXX being the report ID needed.
39
+
40
+ *params*
41
+
42
+ Optional parameter, list of tuples of strings. Some Spotlight reports have required filters which must be supplied to get data back. Date ranges being a common one. Cantaloupe's error messages are fairly clear, in my experience, with telling you what parameteres are needed to run the report and in what format they should be. First element of tuple is filter name and second is filter value. Filter names are in format of "filter0", "filter1", "filter2", etc.
43
+
44
+ Example call
45
+
46
+ ```python
47
+ # Import package
48
+ from clope import run_report
49
+
50
+ # Run report with a report_id and additional parameters
51
+ df_report = run_report('123', [('filter0', '2024-01-01'), ('filter0', '2024-01-31')])
52
+ ```
53
+
54
+ ## Snowflake
55
+
56
+ Cantaloupe also offers a data warehouse product in Snowflake. Good for aggregating lots of information, as well as pulling historical information. However, notably, data is only pushed from Seed into the Snowflake data warehouse once a day, so it is not necessarily going to be accurate as of that moment.
57
+
58
+ Also something to keep in mind is that the system makes use of SCD (slowly changing dimension) in order to keep track of historical info vs current info. So some care should be taken when interpreting the data.
59
+
60
+ For each dataset that uses SCD, a parameter has been included to restrict to current data only or include all data.
61
+
62
+ ### Dates
63
+
64
+ In Snowflake, most date columns are represented by an integer key, rather than the date itself. A couple functions are included with regards to dates. If working directly with Snowflake, you would join the date table onto the fact table you're working with. However, from what I can see the dates are largely deterministic. 1 is 1900-01-01, 2 is 1900-01-02. So I just directly translate from key to date and vice versa with some date math. Much quicker and should give same results as querying the date table itself.
65
+
66
+ ### Dimensions
67
+
68
+ Dimensions describe facts. The location something happened in. The route it happened on. Dimensions generally change over time and make the most use of the SCD schema.
69
+
70
+ - Barcodes (for each pack)
71
+ - Branches
72
+ - Coils (planogram slots)
73
+ - Customers
74
+ - Devices (telemetry)
75
+ - Item Packs (UOMs)
76
+ - Items
77
+ - Lines of Business
78
+ - Locations
79
+ - Machines
80
+ - Micromarkets
81
+ - Operators
82
+ - Routes
83
+ - Supplier Branch
84
+ - Supplier Items (Not yet used seemingly)
85
+ - Suppliers
86
+ - Warehouses
87
+ - Machine Alerts
88
+
89
+ ### Facts
90
+
91
+ A fact is the central information being stored. Generally, things that are not changing. A sale, an inventory, a product movement.
92
+
93
+ - Cashless Vending Tranaction
94
+ - Collection Micromarket Sales
95
+ - Order to Fulfillment (Delivery)
96
+ - Order to Fulfillment (Vending and Micromarket)
97
+ - Delivery Order Receive
98
+ - Sales Revenue By Day
99
+ - Sales Revenue By Visit
100
+ - Sales By Coil
101
+ - Scheduling Machine
102
+ - Scheduling Route Summary
103
+ - Telemetry Sales
104
+ - Vending Micromarket Visit
105
+ - Warehouse Inventory
106
+ - Warehouse Observed Inventory
107
+ - Warehouse Product Movement
108
+ - Warehouse Purchase
109
+ - Warehouse Receive
110
+
111
+ ### Functions
112
+
113
+ Also included in Cantaloupe's Snowflake are a couple functions. General intention seems to be gathering a subset of data from a couple core fact tables. Haven't yet implemented wrappers for these.
@@ -0,0 +1 @@
1
+ from .clope import *
@@ -0,0 +1,15 @@
1
+ """
2
+ clope is a package for pulling data from the Cantaloupe/Seed Office system.
3
+ Primarily via the Spotlight API.
4
+ """
5
+
6
+ from .snow.dates import *
7
+ from .snow.dimensions import *
8
+ from .snow.facts import *
9
+ from .spotlight.spotlight import run_report
10
+
11
+ if __name__ == "__main__":
12
+ """
13
+ Example usage
14
+ """
15
+ pass
@@ -0,0 +1 @@
1
+ # Empty
@@ -0,0 +1,31 @@
1
+ import os
2
+
3
+ import snowflake.connector
4
+
5
+
6
+ def _get_snowflake_connection(
7
+ schema: str = "PUBLIC",
8
+ ) -> snowflake.connector.SnowflakeConnection:
9
+ """
10
+ Connect to Snowflake data warehouse using environment variables. By default,
11
+ connects to the main PUBLIC schema. Can be overridden to connect to others.
12
+ """
13
+ for env in [
14
+ "SNOWFLAKE_USER",
15
+ "SNOWFLAKE_PASSWORD",
16
+ "SNOWFLAKE_ACCOUNT",
17
+ "SNOWFLAKE_WAREHOUSE",
18
+ "SNOWFLAKE_DATABASE",
19
+ ]:
20
+ if env not in os.environ:
21
+ raise Exception(f"Missing required environment variable: {env}")
22
+
23
+ conn = snowflake.connector.connect(
24
+ user=os.environ["SNOWFLAKE_USER"],
25
+ password=os.environ["SNOWFLAKE_PASSWORD"],
26
+ account=os.environ["SNOWFLAKE_ACCOUNT"],
27
+ warehouse=os.environ["SNOWFLAKE_WAREHOUSE"],
28
+ database=os.environ["SNOWFLAKE_DATABASE"],
29
+ schema=schema,
30
+ )
31
+ return conn
@@ -0,0 +1,120 @@
1
+ """
2
+ This module contains functionality to translate between datekeys and dates.
3
+
4
+ Technically there are tables containing this data, but I don't think
5
+ it's necessary to actually query them. We can just calculate them directly.
6
+ Should be much quicker.
7
+ """
8
+
9
+ from datetime import datetime, timedelta
10
+
11
+ import pandas
12
+
13
+ # from connection_handling import _get_snowflake_connection
14
+
15
+
16
+ def datekey_to_date(datekey: int) -> datetime:
17
+ """
18
+ Convert a datekey to a datetime object.
19
+ """
20
+ base_date = datetime(1900, 1, 1)
21
+ return base_date + timedelta(days=datekey - 1)
22
+
23
+
24
+ def date_to_datekey(date: datetime) -> int:
25
+ """
26
+ Convert a datetime object to a datekey.
27
+ """
28
+ base_date = datetime(1900, 1, 1)
29
+ return (date - base_date).days + 1
30
+
31
+
32
+ def get_datekey_range(start_date: datetime, end_date: datetime) -> list[int]:
33
+ """
34
+ Get a list of datekeys between two dates.
35
+ """
36
+ return [date_to_datekey(x) for x in pandas.date_range(start_date, end_date)]
37
+
38
+
39
+ def get_date_range(start_datekey: int, end_datekey: int) -> list[datetime]:
40
+ """
41
+ Get a list of dates between two datekeys.
42
+ """
43
+ return [datekey_to_date(x) for x in range(start_datekey, end_datekey + 1)]
44
+
45
+
46
+ def monthkey_to_date(monthkey: int) -> datetime:
47
+ """
48
+ Convert a monthkey to a datetime object representing the first day of the month.
49
+ """
50
+ year = 1900 + (monthkey - 1) // 12
51
+ month = (monthkey - 1) % 12 + 1
52
+ return datetime(year, month, 1)
53
+
54
+
55
+ def date_to_monthkey(date: datetime) -> int:
56
+ """
57
+ Convert a datetime object to a monthkey.
58
+ """
59
+ return (date.year - 1900) * 12 + date.month
60
+
61
+
62
+ def quarterkey_to_date(quarterkey: int) -> datetime:
63
+ """
64
+ Convert a quarterkey to a datetime object representing the first day of the quarter.
65
+ """
66
+ year = 1900 + (quarterkey - 1) // 4
67
+ quarter = (quarterkey - 1) % 4 + 1
68
+ month = (quarter - 1) * 3 + 1
69
+ return datetime(year, month, 1)
70
+
71
+
72
+ def date_to_quarterkey(date: datetime) -> int:
73
+ """
74
+ Convert a datetime object to a quarterkey.
75
+ """
76
+ quarter = (date.month - 1) // 3 + 1
77
+ return (date.year - 1900) * 4 + quarter
78
+
79
+
80
+ def yearkey_to_date(yearkey: int) -> datetime:
81
+ """
82
+ Convert a yearkey to a datetime object representing the first day of the year.
83
+ """
84
+ year = 1900 + yearkey - 1
85
+ return datetime(year, 1, 1)
86
+
87
+
88
+ def date_to_yearkey(date: datetime) -> int:
89
+ """
90
+ Convert a datetime object to a yearkey.
91
+ """
92
+ return date.year - 1900 + 1
93
+
94
+
95
+ # Commented out in-progress code
96
+ # def date_dim(
97
+ # datekey: int | None = None, date: datetime | None = None
98
+ # ) -> pandas.DataFrame:
99
+ # """
100
+ # Retrieve corresponding datekey from a given date or vice versa.
101
+ # """
102
+ # if date and datekey:
103
+ # raise Exception("Only one of datekey or date must be provided")
104
+ # if not datekey and not date:
105
+ # raise Exception("Either datekey or date must be provided")
106
+
107
+ # conn = _get_snowflake_connection(schema="TIME_DIMENSION")
108
+ # try:
109
+ # if datekey:
110
+ # query = f"SELECT * FROM DATE_DIM WHERE DATEKEY = {datekey}"
111
+ # else:
112
+ # query = f"SELECT * FROM DATE_DIM WHERE DATE = '{date}'"
113
+ # cur = conn.cursor()
114
+ # cur.execute(query)
115
+ # df = cur.fetch_pandas_all()
116
+ # except Exception as e:
117
+ # raise Exception("Error reading Snowflake table", e)
118
+ # finally:
119
+ # conn.close()
120
+ # return df