vortexa-claude-skills 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,242 @@
1
+ """Cargo movement queries and OOW helpers."""
2
+
3
+ import time as time_module
4
+ from collections import defaultdict
5
+
6
+ import pandas as pd
7
+
8
+
9
+ def _to_utc(ts):
10
+ """Convert timestamp-like to UTC-aware pandas Timestamp."""
11
+ if ts is None or pd.isna(ts):
12
+ return None
13
+ try:
14
+ return pd.to_datetime(ts, utc=True)
15
+ except Exception:
16
+ try:
17
+ return pd.to_datetime(ts).tz_localize("UTC")
18
+ except Exception:
19
+ return None
20
+
21
+
22
+ def _extract_location(event, layer):
23
+ """Pull location label from event location_details for a given layer."""
24
+ if getattr(event, "location_details", None):
25
+ for ent in event.location_details:
26
+ if getattr(ent, "layer", None) == layer:
27
+ return ent.label
28
+ return None
29
+
30
+
31
+ def _get_location_timeline(events, layer):
32
+ """Build chronological timeline of location changes from voyage events."""
33
+ loc_changes = []
34
+ for ev in events or []:
35
+ ts = _to_utc(getattr(ev, "start_timestamp", None))
36
+ if ts is None:
37
+ continue
38
+ loc = _extract_location(ev, layer)
39
+ if loc:
40
+ loc_changes.append((ts, loc))
41
+ loc_changes.sort(key=lambda x: x[0])
42
+
43
+ collapsed = []
44
+ prev_label = None
45
+ for ts, label in loc_changes:
46
+ if label != prev_label:
47
+ collapsed.append((ts, label))
48
+ prev_label = label
49
+ return collapsed
50
+
51
+
52
+ def _get_location_at_time(location_timeline, target_time):
53
+ """Find the location in effect at a given time from a timeline."""
54
+ current_location = None
55
+ for ts, location in location_timeline:
56
+ if ts <= target_time:
57
+ current_location = location
58
+ else:
59
+ break
60
+ return current_location
61
+
62
+
63
+ def _build_cargo_location_lookup(voyages, layer):
64
+ """Build cargo_movement_id (16-char) -> voyage location info lookup."""
65
+ cargo_voyages = defaultdict(list)
66
+ for voy in voyages:
67
+ voyage_id = getattr(voy, "id", None)
68
+ vessel_name = getattr(voy.vessel, "name", None) if hasattr(voy, "vessel") else None
69
+ voyage_start = _to_utc(getattr(voy, "start_timestamp", None))
70
+ voyage_end = _to_utc(getattr(voy, "end_timestamp", None))
71
+ location_timeline = _get_location_timeline(voy.events, layer)
72
+ for ev in voy.events or []:
73
+ if getattr(ev, "event_type", None) == "cargo":
74
+ cargo_id = getattr(ev, "cargo_movement_id", None)
75
+ if cargo_id:
76
+ cargo_id_short = cargo_id[:16] if len(cargo_id) > 16 else cargo_id
77
+ cargo_voyages[cargo_id_short].append({
78
+ "voyage_id": voyage_id,
79
+ "vessel_name": vessel_name,
80
+ "location_timeline": location_timeline,
81
+ "voyage_start": voyage_start,
82
+ "voyage_end": voyage_end,
83
+ })
84
+ return dict(cargo_voyages)
85
+
86
+
87
+ def _get_cargo_location_for_day(cargo_id_short, day, lookup):
88
+ """Find the location for a cargo on a specific day using voyage location data."""
89
+ voyage_refs = lookup.get(cargo_id_short, [])
90
+ if not voyage_refs:
91
+ return "Unknown"
92
+
93
+ day_end = day + pd.Timedelta(days=1)
94
+
95
+ for ref in voyage_refs:
96
+ voyage_start = ref.get("voyage_start")
97
+ voyage_end = ref.get("voyage_end")
98
+ if voyage_start and voyage_start < day_end and (voyage_end is None or voyage_end > day):
99
+ location = _get_location_at_time(ref["location_timeline"], day_end)
100
+ if location:
101
+ return location
102
+
103
+ for ref in voyage_refs:
104
+ location = _get_location_at_time(ref["location_timeline"], day_end)
105
+ if location:
106
+ return location
107
+
108
+ return "Unknown"
109
+
110
+
111
+ def cargo_on_water_ts(
112
+ time_min, time_max, product, unit,
113
+ location_layer="shipping_region_v2",
114
+ exclude_intra_country=False, verbose=True,
115
+ ):
116
+ """Build daily cargo-on-water by location using CM-Authoritative approach.
117
+
118
+ Uses CargoMovements for authoritative OOW periods/quantities and
119
+ VoyagesSearchEnriched for location data, joined on cargo_movement_id.
120
+
121
+ Returns wide DataFrame: date index, location columns, Total column.
122
+ """
123
+ from vortexasdk import CargoMovements, VoyagesSearchEnriched
124
+
125
+ total_steps = 5
126
+ start_time = time_module.time()
127
+ product_ids = [product] if isinstance(product, str) else list(product)
128
+
129
+ if verbose:
130
+ print(f"Building cargo-on-water (unit: {unit}, layer: {location_layer})")
131
+
132
+ # Step 1: CargoMovements
133
+ if verbose:
134
+ print(f"Step 1/{total_steps}: Querying CargoMovements...", end=" ", flush=True)
135
+ step_start = time_module.time()
136
+ cm_columns = [
137
+ "cargo_movement_id", "quantity",
138
+ "events.cargo_oil_on_water_event.0.start_timestamp",
139
+ "events.cargo_oil_on_water_event.0.end_timestamp",
140
+ ]
141
+ cm_params = {
142
+ "filter_activity": "cargo_on_water_state",
143
+ "filter_time_min": time_min,
144
+ "filter_time_max": time_max,
145
+ "filter_products": product_ids,
146
+ "cm_unit": unit,
147
+ }
148
+ if exclude_intra_country:
149
+ cm_params["intra_movements"] = "exclude_intra_country"
150
+
151
+ raw_cm = CargoMovements().search(**cm_params).to_df(columns=cm_columns)
152
+ if verbose:
153
+ print(f"done ({time_module.time() - step_start:.1f}s) - {len(raw_cm):,} cargoes")
154
+
155
+ if raw_cm.empty:
156
+ if verbose:
157
+ print("No cargo movements found.")
158
+ return pd.DataFrame()
159
+
160
+ cm_df = raw_cm.copy()
161
+ cm_df["quantity"] = pd.to_numeric(cm_df["quantity"], errors="coerce").fillna(0)
162
+ cm_df["oow_start"] = pd.to_datetime(
163
+ cm_df["events.cargo_oil_on_water_event.0.start_timestamp"], utc=True
164
+ )
165
+ cm_df["oow_end"] = pd.to_datetime(
166
+ cm_df["events.cargo_oil_on_water_event.0.end_timestamp"], utc=True
167
+ )
168
+ cm_df["cargo_id_short"] = cm_df["cargo_movement_id"].str[:16]
169
+
170
+ # Step 2: VoyagesSearchEnriched
171
+ if verbose:
172
+ print(f"Step 2/{total_steps}: Querying VoyagesSearchEnriched...", end=" ", flush=True)
173
+ step_start = time_module.time()
174
+ voyages = VoyagesSearchEnriched().search(
175
+ time_min=time_min, time_max=time_max,
176
+ latest_products=product_ids,
177
+ voyage_status="laden",
178
+ voyage_date_range_activity="active",
179
+ ).to_list()
180
+ if verbose:
181
+ print(f"done ({time_module.time() - step_start:.1f}s) - {len(voyages):,} voyages")
182
+
183
+ # Step 3: Build location lookup
184
+ if verbose:
185
+ print(f"Step 3/{total_steps}: Building location lookup...", end=" ", flush=True)
186
+ step_start = time_module.time()
187
+ cargo_location_lookup = _build_cargo_location_lookup(voyages, location_layer)
188
+ if verbose:
189
+ print(f"done ({time_module.time() - step_start:.1f}s) - {len(cargo_location_lookup):,} unique cargoes")
190
+
191
+ # Step 4: Aggregate by day
192
+ if verbose:
193
+ print(f"Step 4/{total_steps}: Aggregating by day...", end=" ", flush=True)
194
+ step_start = time_module.time()
195
+ tmin = _to_utc(time_min)
196
+ tmax = _to_utc(time_max)
197
+ dates = pd.date_range(start=tmin.normalize(), end=tmax.normalize(), freq="D", tz="UTC")
198
+ daily_by_location = {d: defaultdict(float) for d in dates}
199
+
200
+ for _, cargo in cm_df.iterrows():
201
+ oow_start = cargo["oow_start"]
202
+ oow_end = cargo["oow_end"]
203
+ quantity = cargo["quantity"]
204
+ cargo_id_short = cargo["cargo_id_short"]
205
+ if pd.isna(oow_start):
206
+ continue
207
+ for day in dates:
208
+ day_start = day
209
+ day_end = day + pd.Timedelta(days=1)
210
+ if oow_start < day_end and (pd.isna(oow_end) or oow_end > day_start):
211
+ location = _get_cargo_location_for_day(cargo_id_short, day_start, cargo_location_lookup)
212
+ daily_by_location[day][location] += quantity
213
+ if verbose:
214
+ print(f"done ({time_module.time() - step_start:.1f}s) - {len(dates):,} days")
215
+
216
+ # Step 5: Build DataFrame
217
+ if verbose:
218
+ print(f"Step 5/{total_steps}: Building DataFrame...", end=" ", flush=True)
219
+ step_start = time_module.time()
220
+ rows = []
221
+ for day, locations in daily_by_location.items():
222
+ for location, qty in locations.items():
223
+ rows.append({"date": day, "location": location, "value": qty})
224
+
225
+ if not rows:
226
+ if verbose:
227
+ print("done (no data)")
228
+ return pd.DataFrame()
229
+
230
+ df = pd.DataFrame(rows)
231
+ wide = df.pivot(index="date", columns="location", values="value").fillna(0)
232
+ wide["Total"] = wide.sum(axis=1)
233
+ wide.index = pd.to_datetime(wide.index, utc=True)
234
+ wide.index.name = "date"
235
+ wide.columns.name = None
236
+
237
+ total_elapsed = time_module.time() - start_time
238
+ if verbose:
239
+ print(f"done ({time_module.time() - step_start:.1f}s)")
240
+ print(f"Complete! Total time: {int(total_elapsed // 60)}m {int(total_elapsed % 60)}s")
241
+
242
+ return wide.sort_index()
@@ -0,0 +1,6 @@
1
+ vortexasdk>=0.30.0
2
+ pandas>=2.0
3
+ plotly>=5.18
4
+ python-dotenv>=1.0
5
+ numpy>=1.24
6
+ python-dateutil>=2.8
@@ -0,0 +1,200 @@
1
+ """Seasonal analysis -- multi-year flows with min/max/avg bands."""
2
+
3
+ import pandas as pd
4
+ import dateutil.relativedelta
5
+ from datetime import datetime
6
+
7
+
8
+ def get_search_blocks(start_y, start_m, start_d, today):
9
+ """Split date range into 4-year API-compatible blocks (Vortexa max window)."""
10
+ blocks = []
11
+ start = datetime(start_y, start_m, start_d)
12
+ end = start + dateutil.relativedelta.relativedelta(years=4) - dateutil.relativedelta.relativedelta(seconds=1)
13
+
14
+ if end > today:
15
+ blocks.append((start, today))
16
+ else:
17
+ blocks.append((start, end))
18
+
19
+ while end < today:
20
+ start += dateutil.relativedelta.relativedelta(years=4)
21
+ end += dateutil.relativedelta.relativedelta(years=4)
22
+ if end > today:
23
+ blocks.append((start, today))
24
+ else:
25
+ blocks.append((start, end))
26
+
27
+ return blocks
28
+
29
+
30
+ def moving_average(data, period, col=None):
31
+ """Calculate rolling moving average on a DataFrame."""
32
+ if col is None:
33
+ moving_avg = pd.DataFrame(data.iloc[:, 1:].rolling(window=period, min_periods=1).mean())
34
+ moving_avg_df = pd.concat([data.iloc[0:, 0:1], moving_avg], axis=1)
35
+ moving_avg_df.columns = list(data.columns)
36
+ else:
37
+ moving_avg = pd.DataFrame(data[col].rolling(window=period, min_periods=1).mean())
38
+ moving_avg.columns = [f"{period}-day moving_avg"]
39
+ data_cols = list(data.columns)
40
+ date_cols = [x for x in data_cols if x != col]
41
+ moving_avg_df = pd.concat([data[date_cols], moving_avg], axis=1)
42
+ moving_avg_df.rename(columns={f"{period}-day moving_avg": "value"}, inplace=True)
43
+
44
+ return moving_avg_df
45
+
46
+
47
+ def seasonal_charts(data, freq, start_y):
48
+ """Convert flows data to seasonal min/max/avg/current/last-year format."""
49
+ df = data[data["dd_mmm"] != "29-Feb"]
50
+ df.reset_index(drop=True, inplace=True)
51
+
52
+ current_date = datetime.today()
53
+ this_year = current_date.year
54
+ last_year = this_year - 1
55
+ stats_end_y = last_year
56
+ stats_start_y = start_y
57
+
58
+ stats_df = df[(df["year"] >= stats_start_y) & (df["year"] <= stats_end_y)]
59
+ this_year_df = df[df["year"] == this_year]
60
+
61
+ if freq == "day":
62
+ start_date = datetime(2023, 1, 1)
63
+ end_date = datetime(2023, 12, 31)
64
+ date_range = pd.DataFrame(pd.date_range(start=start_date, end=end_date, freq="1D"), columns=["Date"])
65
+ date_range["Date"] = date_range["Date"].dt.strftime("%d-%b")
66
+
67
+ mins, maxs, avgs, this_year_vals = [], [], [], []
68
+
69
+ for i in range(len(date_range)):
70
+ temp = stats_df[stats_df["dd_mmm"] == date_range["Date"][i]]
71
+ mins.append(min(temp["value"]))
72
+ maxs.append(max(temp["value"]))
73
+ avgs.append(temp["value"].mean())
74
+
75
+ last_year_df = pd.DataFrame(stats_df[stats_df["year"] == last_year]["value"])
76
+ last_year_df.columns = ["Last year"]
77
+ last_year_df.reset_index(drop=True, inplace=True)
78
+
79
+ for i in range(len(date_range)):
80
+ temp = this_year_df[this_year_df["dd_mmm"] == date_range["Date"][i]]
81
+ if len(temp) != 0:
82
+ this_year_vals.append(temp["value"].iloc[0])
83
+ else:
84
+ this_year_vals.append("")
85
+
86
+ mins_df = pd.DataFrame(mins, columns=["Min."])
87
+ maxs_df = pd.DataFrame(maxs, columns=["Max."])
88
+ avgs_df = pd.DataFrame(avgs, columns=[f"Average {stats_start_y}-{stats_end_y}"])
89
+ this_year_vals_df = pd.DataFrame(this_year_vals, columns=["Current year"])
90
+
91
+ seasonal_df = pd.concat([date_range, mins_df, maxs_df, avgs_df, last_year_df, this_year_vals_df], axis=1)
92
+ seasonal_df[f"Range {stats_start_y}-{stats_end_y}"] = seasonal_df["Max."] - seasonal_df["Min."]
93
+ seasonal_df = seasonal_df[
94
+ ["Date", "Min.", f"Range {stats_start_y}-{stats_end_y}",
95
+ f"Average {stats_start_y}-{stats_end_y}", "Last year", "Current year"]
96
+ ]
97
+
98
+ elif freq == "month":
99
+ start_date = datetime(2023, 1, 1)
100
+ end_date = datetime(2023, 12, 31)
101
+ date_range = pd.DataFrame(pd.date_range(start=start_date, end=end_date, freq="ME"), columns=["Date"])
102
+ date_range["Date"] = date_range["Date"].dt.strftime("%b")
103
+
104
+ mins, maxs, avgs, this_year_vals = [], [], [], []
105
+
106
+ for i in range(len(date_range)):
107
+ temp = stats_df[stats_df["month"] == date_range["Date"][i]]
108
+ mins.append(min(temp["value"]))
109
+ maxs.append(max(temp["value"]))
110
+ avgs.append(temp["value"].mean())
111
+
112
+ last_year_df = pd.DataFrame(stats_df[stats_df["year"] == last_year]["value"])
113
+ last_year_df.columns = ["Last year"]
114
+ last_year_df.reset_index(drop=True, inplace=True)
115
+
116
+ for i in range(len(date_range)):
117
+ temp = this_year_df[this_year_df["month"] == date_range["Date"][i]]
118
+ if len(temp) != 0:
119
+ this_year_vals.append(temp["value"].iloc[0])
120
+ else:
121
+ this_year_vals.append("")
122
+
123
+ mins_df = pd.DataFrame(mins, columns=["Min."])
124
+ maxs_df = pd.DataFrame(maxs, columns=["Max."])
125
+ avgs_df = pd.DataFrame(avgs, columns=[f"Average {stats_start_y}-{stats_end_y}"])
126
+ this_year_vals_df = pd.DataFrame(this_year_vals, columns=["Current year"])
127
+
128
+ seasonal_df = pd.concat([date_range, mins_df, maxs_df, avgs_df, last_year_df, this_year_vals_df], axis=1)
129
+ seasonal_df[f"Range {stats_start_y}-{stats_end_y}"] = seasonal_df["Max."] - seasonal_df["Min."]
130
+ seasonal_df = seasonal_df[
131
+ ["Date", "Min.", f"Range {stats_start_y}-{stats_end_y}",
132
+ f"Average {stats_start_y}-{stats_end_y}", "Last year", "Current year"]
133
+ ]
134
+
135
+ return seasonal_df
136
+
137
+
138
+ def complete_seasonal_flows(
139
+ start_y, start_m, start_d,
140
+ product_ids, product_ids_excl,
141
+ freq, unit, activity,
142
+ origin_ids, origin_ids_excl,
143
+ destination_ids, destination_ids_excl,
144
+ storage_location_ids=None,
145
+ ma_period=None, exclude_intra=True,
146
+ ):
147
+ """End-to-end: query flows -> optional MA -> seasonal format. Returns seasonal DataFrame."""
148
+ from vortexasdk import CargoTimeSeries
149
+
150
+ today = datetime.today()
151
+ search_blocks = get_search_blocks(start_y, start_m, start_d, today)
152
+
153
+ intra_movements = "exclude_intra_country" if exclude_intra else "all"
154
+
155
+ result_dfs = pd.DataFrame()
156
+ for block in search_blocks:
157
+ time_min = block[0]
158
+ time_max = block[1]
159
+
160
+ result = CargoTimeSeries().search(
161
+ filter_time_min=time_min,
162
+ filter_time_max=time_max,
163
+ filter_origins=origin_ids,
164
+ exclude_origins=origin_ids_excl,
165
+ filter_destinations=destination_ids,
166
+ exclude_destinations=destination_ids_excl,
167
+ filter_storage_locations=storage_location_ids,
168
+ filter_products=product_ids,
169
+ exclude_products=product_ids_excl,
170
+ intra_movements=intra_movements,
171
+ filter_activity=activity,
172
+ timeseries_frequency=freq,
173
+ timeseries_unit=unit,
174
+ ).to_df(columns="all")
175
+
176
+ result2 = result[["key", "value"]]
177
+ result2.columns = ["date", "value"]
178
+ result_dfs = pd.concat([result_dfs, result2])
179
+
180
+ result_dfs = result_dfs.copy()
181
+ result_dfs["date"] = pd.to_datetime(result_dfs["date"])
182
+ result_dfs["string_date"] = result_dfs["date"].dt.strftime("%d-%m-%Y")
183
+ result_dfs["dd_mmm"] = result_dfs["date"].dt.strftime("%d-%b")
184
+ result_dfs["month"] = result_dfs["date"].dt.strftime("%b")
185
+ result_dfs["week_end_timestamp"] = result_dfs["date"] + pd.offsets.Week(weekday=6)
186
+ result_dfs["week_number"] = result_dfs["date"].dt.isocalendar().week
187
+ result_dfs["year"] = round(pd.to_numeric(result_dfs["date"].dt.strftime("%Y")), 0)
188
+ result_dfs = result_dfs.fillna(0)
189
+ result_dfs = result_dfs[
190
+ ["date", "week_end_timestamp", "string_date", "dd_mmm", "week_number", "month", "year", "value"]
191
+ ]
192
+ result_dfs.reset_index(drop=True, inplace=True)
193
+
194
+ if ma_period is not None:
195
+ data = moving_average(data=result_dfs, period=ma_period, col="value")
196
+ else:
197
+ data = result_dfs
198
+
199
+ seasonal_data = seasonal_charts(data=data, freq=freq, start_y=start_y)
200
+ return seasonal_data