vortexa-claude-skills 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,271 @@
1
+ """CargoTimeSeries queries and breakdown parsing."""
2
+
3
+ import re
4
+ import pandas as pd
5
+
6
+ from lib.utils import _freq_norm
7
+
8
+
9
+ def cargo_timeseries_split(
10
+ time_min, time_max, activity, split_property,
11
+ products=None, origins=None, destinations=None,
12
+ storage_locations=None, vessels=None,
13
+ unit="b", frequency="day",
14
+ intra_movements="exclude_intra_country",
15
+ ):
16
+ """Query CargoTimeSeries with split property, return pivoted wide DataFrame."""
17
+ from vortexasdk import CargoTimeSeries
18
+
19
+ kwargs = {
20
+ "filter_activity": activity,
21
+ "filter_time_min": time_min,
22
+ "filter_time_max": time_max,
23
+ "timeseries_frequency": frequency,
24
+ "timeseries_property": split_property,
25
+ "timeseries_unit": unit,
26
+ "intra_movements": intra_movements,
27
+ }
28
+ if products:
29
+ kwargs["filter_products"] = products
30
+ if origins:
31
+ kwargs["filter_origins"] = origins
32
+ if destinations:
33
+ kwargs["filter_destinations"] = destinations
34
+ if storage_locations:
35
+ kwargs["filter_storage_locations"] = storage_locations
36
+ if vessels:
37
+ kwargs["filter_vessels"] = vessels
38
+
39
+ result = CargoTimeSeries().search(**kwargs).to_list()
40
+ if not result:
41
+ return pd.DataFrame()
42
+
43
+ return _parse_split_result(result)
44
+
45
+
46
+ def _parse_split_result(result):
47
+ """Parse .to_list() CTS result with breakdown into wide DataFrame."""
48
+ rows = []
49
+ for item in result:
50
+ d = pd.to_datetime(getattr(item, "key"))
51
+ total = float(getattr(item, "value", 0.0) or 0.0)
52
+ breakdown = getattr(item, "breakdown", None) or []
53
+
54
+ if not breakdown:
55
+ rows.append((d, "Other", total))
56
+ continue
57
+
58
+ summed = 0.0
59
+ for b in breakdown:
60
+ label = (
61
+ getattr(b, "label", None)
62
+ or getattr(b, "name", None)
63
+ or getattr(b, "id", None)
64
+ or "Unknown"
65
+ )
66
+ val = float(getattr(b, "value", 0.0) or 0.0)
67
+ rows.append((d, label, val))
68
+ summed += val
69
+
70
+ residual = max(total - summed, 0.0)
71
+ if residual > 0:
72
+ rows.append((d, "Other", residual))
73
+
74
+ df_long = pd.DataFrame(rows, columns=["date", "series", "value"])
75
+ df_long["value"] = pd.to_numeric(df_long["value"], errors="coerce").fillna(0.0)
76
+
77
+ wide = (
78
+ df_long.groupby(["date", "series"], as_index=False)["value"]
79
+ .sum()
80
+ .pivot(index="date", columns="series", values="value")
81
+ .fillna(0.0)
82
+ .sort_index()
83
+ )
84
+ wide["Total"] = wide.sum(axis=1)
85
+ wide.columns.name = None
86
+ return wide
87
+
88
+
89
+ def _parse_timeseries_breakdown(df_all, show_top_x=None):
90
+ """Flatten CargoTimeSeries 'columns=all' breakdown.N.* fields into wide Date x Category table."""
91
+ df = df_all.copy()
92
+ if "key" not in df.columns:
93
+ raise ValueError("Unexpected TimeSeries frame: missing 'key'.")
94
+
95
+ date = pd.to_datetime(df["key"], utc=True, errors="coerce")
96
+ if date.isna().all():
97
+ raise ValueError("TimeSeries: 'key' could not be parsed to datetime.")
98
+
99
+ pat_label = re.compile(r"^breakdown\.(\d+)\.label$")
100
+ labels = [c for c in df.columns if pat_label.match(c)]
101
+
102
+ if not labels:
103
+ out = pd.DataFrame({
104
+ "Date": date.dt.floor("D"),
105
+ "Total": pd.to_numeric(df.get("value", 0), errors="coerce").fillna(0.0),
106
+ })
107
+ return out.groupby("Date", as_index=False)["Total"].sum().sort_values("Date")
108
+
109
+ idxs = sorted({int(pat_label.match(c).group(1)) for c in labels})
110
+ parts = []
111
+ for i in idxs:
112
+ lab_col = f"breakdown.{i}.label"
113
+ val_col = f"breakdown.{i}.value"
114
+ if lab_col in df.columns and val_col in df.columns:
115
+ parts.append(pd.DataFrame({
116
+ "Date": date,
117
+ "label": df[lab_col].fillna(""),
118
+ "value": pd.to_numeric(df[val_col], errors="coerce").fillna(0.0),
119
+ }))
120
+
121
+ if not parts:
122
+ return pd.DataFrame({"Date": pd.to_datetime([])})
123
+
124
+ long = pd.concat(parts, ignore_index=True)
125
+ long = long[long["label"] != ""].copy()
126
+ if long.empty:
127
+ return pd.DataFrame({"Date": pd.to_datetime([])})
128
+
129
+ long["Date"] = pd.to_datetime(long["Date"]).dt.floor("D")
130
+ agg = long.groupby(["Date", "label"], as_index=False)["value"].sum()
131
+ if agg.empty:
132
+ return pd.DataFrame({"Date": pd.to_datetime([])})
133
+
134
+ wide = agg.pivot(index="Date", columns="label", values="value").fillna(0.0)
135
+ wide.index = pd.to_datetime(wide.index)
136
+ wide.index.name = "Date"
137
+ wide = wide.sort_index()
138
+
139
+ if show_top_x is not None and wide.shape[1] > show_top_x:
140
+ totals = wide.sum(axis=0).sort_values(ascending=False)
141
+ keep = list(totals.index[:show_top_x])
142
+ other = list(totals.index[show_top_x:])
143
+ if other:
144
+ wide["Other"] = wide[other].sum(axis=1)
145
+ wide = wide[keep + (["Other"] if other else [])]
146
+
147
+ out = wide.reset_index()
148
+ out = out.rename_axis(columns=None)
149
+ return out
150
+
151
+
152
+ def flows_time_series_split(
153
+ *,
154
+ time_min,
155
+ time_max,
156
+ activity,
157
+ split_property="shipping_region_v2",
158
+ frequency="week",
159
+ top_n=8,
160
+ unit="b",
161
+ products=None,
162
+ origins=None,
163
+ destinations=None,
164
+ exclude_origins=None,
165
+ exclude_destinations=None,
166
+ exclude_products=None,
167
+ storage_locations=None,
168
+ intra_movements="exclude_intra_country",
169
+ ):
170
+ """CargoTimeSeries split by a property with Top-N + Other and resampling."""
171
+ from vortexasdk import CargoTimeSeries
172
+
173
+ freq_norm = _freq_norm(frequency)
174
+
175
+ split_api = split_property
176
+ if split_property == "shipping_region_v2":
177
+ if activity in ("loading_state", "loading_end"):
178
+ split_api = "origin_shipping_region_v2"
179
+ elif activity in ("unloading_state", "unloading_start"):
180
+ split_api = "destination_shipping_region_v2"
181
+ elif activity == "storing_state":
182
+ split_api = "storage_location_shipping_region_v2"
183
+
184
+ kwargs = {
185
+ "filter_time_min": time_min,
186
+ "filter_time_max": time_max,
187
+ "filter_activity": activity,
188
+ "timeseries_frequency": "day",
189
+ "timeseries_property": split_api,
190
+ "timeseries_unit": unit,
191
+ "intra_movements": intra_movements,
192
+ }
193
+ if products:
194
+ kwargs["filter_products"] = products
195
+ if origins:
196
+ kwargs["filter_origins"] = origins
197
+ if destinations:
198
+ kwargs["filter_destinations"] = destinations
199
+ if exclude_origins:
200
+ kwargs["exclude_origins"] = exclude_origins
201
+ if exclude_destinations:
202
+ kwargs["exclude_destinations"] = exclude_destinations
203
+ if exclude_products:
204
+ kwargs["exclude_products"] = exclude_products
205
+ if storage_locations:
206
+ kwargs["filter_storage_locations"] = storage_locations
207
+
208
+ res = CargoTimeSeries().search(**kwargs).to_list()
209
+
210
+ if not res:
211
+ return pd.DataFrame(), pd.DataFrame()
212
+
213
+ rows = []
214
+ for it in res:
215
+ d = pd.to_datetime(getattr(it, "key"))
216
+ total = float(getattr(it, "value", 0.0) or 0.0)
217
+ breakdown = getattr(it, "breakdown", None) or []
218
+ if not breakdown:
219
+ rows.append((d, "Other", total))
220
+ continue
221
+ summed = 0.0
222
+ for b in breakdown:
223
+ label = (
224
+ getattr(b, "label", None)
225
+ or getattr(b, "name", None)
226
+ or getattr(b, "id", None)
227
+ or "Unknown"
228
+ )
229
+ val = float(getattr(b, "value", 0.0) or 0.0)
230
+ rows.append((d, label, val))
231
+ summed += val
232
+ residual = max(total - summed, 0.0)
233
+ if residual > 0:
234
+ rows.append((d, "Other", residual))
235
+
236
+ df_long = pd.DataFrame(rows, columns=["date", "series", "value"])
237
+ df_long["value"] = pd.to_numeric(df_long["value"], errors="coerce").fillna(0.0)
238
+
239
+ full_piv = (
240
+ df_long.groupby(["date", "series"], as_index=False)["value"]
241
+ .sum()
242
+ .pivot(index="date", columns="series", values="value")
243
+ .fillna(0.0)
244
+ .sort_index()
245
+ )
246
+
247
+ resample_map = {
248
+ "week": "W-SUN", "month": "MS", "quarter": "QS", "year": "YS",
249
+ }
250
+ if freq_norm in resample_map:
251
+ plot_piv_base = full_piv.resample(resample_map[freq_norm]).sum()
252
+ else:
253
+ plot_piv_base = full_piv.copy()
254
+
255
+ sums = (
256
+ df_long[df_long.series != "Other"]
257
+ .groupby("series")["value"]
258
+ .sum()
259
+ .sort_values(ascending=False)
260
+ )
261
+ keep = sums.head(top_n).index.tolist()
262
+
263
+ plot_piv = plot_piv_base.copy()
264
+ other_cols = [c for c in plot_piv.columns if c not in keep]
265
+ if keep:
266
+ plot_piv = plot_piv.reindex(columns=keep + other_cols)
267
+ if other_cols:
268
+ plot_piv["Other"] = plot_piv[other_cols].sum(axis=1)
269
+ plot_piv = plot_piv[keep + (["Other"] if "Other" in plot_piv.columns else [])]
270
+
271
+ return full_piv, plot_piv.fillna(0.0)
package/lib/utils.py ADDED
@@ -0,0 +1,120 @@
1
+ """Date helpers, DataFrame formatters, and shared utilities."""
2
+
3
+ import pandas as pd
4
+ from datetime import datetime
5
+
6
+
7
+ def _to_dt(x):
8
+ """Convert to datetime, strip timezone."""
9
+ x = pd.to_datetime(x, errors="coerce")
10
+ if isinstance(x, (pd.Series, pd.Index)):
11
+ return x.dt.tz_localize(None)
12
+ if isinstance(x, (pd.Timestamp, datetime)):
13
+ try:
14
+ return x.tz_localize(None)
15
+ except Exception:
16
+ try:
17
+ return x.tz_convert(None)
18
+ except Exception:
19
+ return x
20
+ return x
21
+
22
+
23
+ def _cols(df, names):
24
+ """Safe column selection -- returns only columns that exist in df."""
25
+ return [c for c in names if c in df.columns]
26
+
27
+
28
+ def _freq_norm(x):
29
+ """Normalize user-friendly frequency string to API value."""
30
+ m = {
31
+ "d": "day", "day": "day", "daily": "day",
32
+ "w": "week", "week": "week", "weekly": "week",
33
+ "m": "month", "mon": "month", "month": "month", "monthly": "month",
34
+ "q": "quarter", "quarter": "quarter", "quarterly": "quarter",
35
+ "y": "year", "yr": "year", "year": "year", "annual": "year", "annually": "year",
36
+ }
37
+ x = (x or "").lower()
38
+ out = m.get(x, x)
39
+ if out not in {"day", "week", "month", "quarter", "year"}:
40
+ raise ValueError("frequency must map to day/week/month/quarter/year")
41
+ return out
42
+
43
+
44
+ _CM_COLUMN_MAP = {
45
+ "cargo_movement_id": "Cargo ID",
46
+ "quantity": "Volume",
47
+ "discharge_quantity": "Discharge Volume",
48
+ "status": "Status",
49
+ "product.group.label": "Product Group",
50
+ "product.group_product.label": "Product Sub-Group",
51
+ "product.category.label": "Product Category",
52
+ "product.grade.label": "Product Grade",
53
+ "events.cargo_port_load_event.0.location.terminal.label": "Load Terminal",
54
+ "events.cargo_port_load_event.0.location.port.label": "Load Port",
55
+ "events.cargo_port_load_event.0.location.country.label": "Load Country",
56
+ "events.cargo_port_load_event.0.location.shipping_region_v2.label": "Load Shipping Region",
57
+ "events.cargo_port_load_event.0.start_timestamp": "Load Start",
58
+ "events.cargo_port_load_event.0.end_timestamp": "Load Date",
59
+ "events.cargo_port_unload_event.0.location.terminal.label": "Unload Terminal",
60
+ "events.cargo_port_unload_event.0.location.port.label": "Unload Port",
61
+ "events.cargo_port_unload_event.0.location.country.label": "Unload Country",
62
+ "events.cargo_port_unload_event.0.location.shipping_region_v2.label": "Unload Shipping Region",
63
+ "events.cargo_port_unload_event.0.start_timestamp": "Unload Date",
64
+ "events.cargo_port_unload_event.0.end_timestamp": "Unload End",
65
+ "vessels.0.name": "Vessel Name",
66
+ "vessels.0.imo": "Vessel IMO",
67
+ "vessels.0.vessel_class": "Vessel Class",
68
+ "vessels.0.corporate_entities.charterer.label": "Charterer",
69
+ "vessels.0.corporate_entities.effective_controller.label": "Effective Controller",
70
+ "vessels.0.corporate_entities.time_charterer.label": "Time Charterer",
71
+ }
72
+
73
+ _VSE_COLUMN_MAP = {
74
+ "START DATE": "Voyage Start",
75
+ "END DATE": "Voyage End",
76
+ "LATEST PRODUCT GROUP": "Product Group",
77
+ "LATEST PRODUCT GRADE": "Product Grade",
78
+ "ORIGIN COUNTRY": "Origin Country",
79
+ "DESTINATION COUNTRY": "Destination Country",
80
+ "ORIGIN PORT": "Origin Port",
81
+ "DESTINATION PORT": "Destination Port",
82
+ }
83
+
84
+
85
+ def rename_cm_columns(df):
86
+ """Rename CargoMovements DataFrame columns to human-readable names."""
87
+ return df.rename(columns={k: v for k, v in _CM_COLUMN_MAP.items() if k in df.columns})
88
+
89
+
90
+ def rename_vse_columns(df):
91
+ """Rename VoyagesSearchEnriched DataFrame columns to human-readable names."""
92
+ return df.rename(columns={k: v for k, v in _VSE_COLUMN_MAP.items() if k in df.columns})
93
+
94
+
95
+ def top_n_with_other(df, n=10, total_col="Total"):
96
+ """Keep top N columns by sum, aggregate the rest into 'Other'."""
97
+ out = df.copy()
98
+
99
+ # Separate total column if present
100
+ has_total = total_col in out.columns
101
+ if has_total:
102
+ total_series = out.pop(total_col)
103
+
104
+ # Rank columns by sum descending
105
+ ranked = out.sum().sort_values(ascending=False)
106
+ top_cols = ranked.index[:n].tolist()
107
+ rest_cols = ranked.index[n:].tolist()
108
+
109
+ # Build result with top N columns
110
+ result = out[top_cols].copy()
111
+
112
+ # Add Other only if there are leftover columns
113
+ if rest_cols:
114
+ result["Other"] = out[rest_cols].sum(axis=1)
115
+
116
+ # Restore total column
117
+ if has_total:
118
+ result[total_col] = total_series
119
+
120
+ return result
package/lib/vessels.py ADDED
@@ -0,0 +1,192 @@
1
+ """Vessel tracking -- IMO resolution, signals positions, fleet tracking."""
2
+
3
+ import os
4
+
5
+ import pandas as pd
6
+ import requests
7
+
8
+
9
+ signals_url = "https://api.vortexa.com/v6/signals/vessel-positions"
10
+
11
+
12
+ def map_imos_to_vortexa_ids(imo_dict):
13
+ """Map {name: imo_number} dict to DataFrame with vessel IDs."""
14
+ from vortexasdk import Vessels
15
+
16
+ api_key = os.environ.get("VORTEXA_API_KEY")
17
+ if api_key is None:
18
+ raise RuntimeError("VORTEXA_API_KEY not set in environment.")
19
+
20
+ clean_imo_dict = {}
21
+ for name, imo in imo_dict.items():
22
+ if imo is None:
23
+ continue
24
+ try:
25
+ clean_imo_dict[name] = int(str(imo).strip())
26
+ except ValueError:
27
+ print(f"WARNING: Could not parse IMO '{imo}' for '{name}'. Skipping.")
28
+
29
+ imo_list = list(clean_imo_dict.values())
30
+ if not imo_list:
31
+ raise ValueError("No valid IMOs provided in imo_dict.")
32
+
33
+ vessels_df = Vessels().search().to_df(columns=["id", "name", "imo", "vessel_class"])
34
+ vessels_df["imo"] = pd.to_numeric(vessels_df["imo"], errors="coerce")
35
+ vessels_subset = vessels_df[vessels_df["imo"].isin(imo_list)]
36
+
37
+ mapping_records = []
38
+ for user_name, imo in clean_imo_dict.items():
39
+ matches = vessels_subset[vessels_subset["imo"] == imo]
40
+ if matches.empty:
41
+ print(f"WARNING: IMO {imo} for '{user_name}' not found.")
42
+ continue
43
+ row = matches.iloc[0]
44
+ mapping_records.append({
45
+ "user_vessel_name": user_name,
46
+ "imo": imo,
47
+ "vessel_id": row["id"],
48
+ "vortexa_name": row["name"],
49
+ "vessel_class": row.get("vessel_class"),
50
+ })
51
+
52
+ mapping_df = pd.DataFrame(mapping_records)
53
+ if mapping_df.empty:
54
+ raise ValueError("No IMOs could be matched to Vortexa vessels.")
55
+ return mapping_df
56
+
57
+
58
+ def extract_vessel_positions(session, vessel_ids_short, start_timestamp, end_timestamp, interval="1d", timeout=45):
59
+ """Single batch signals API call for vessel positions."""
60
+ api_key = os.environ.get("VORTEXA_API_KEY")
61
+ params = {
62
+ "apikey": api_key,
63
+ "time_min": start_timestamp.isoformat(),
64
+ "time_max": end_timestamp.isoformat(),
65
+ "interval": interval,
66
+ "vessel_id": vessel_ids_short,
67
+ }
68
+ resp = session.get(signals_url, params=params, timeout=timeout)
69
+ resp.raise_for_status()
70
+ data = resp.json()
71
+ return pd.DataFrame(data.get("data", []))
72
+
73
+
74
+ def fetch_signals_for_fleet(vessel_ids, start_timestamp, end_timestamp,
75
+ interval="1d", group=20, verbose=True, timeout=45):
76
+ """Batch vessel position pull with rate limiting (groups of N)."""
77
+ all_positions = pd.DataFrame()
78
+ total = len(vessel_ids)
79
+
80
+ with requests.Session() as session:
81
+ for i in range(0, total, group):
82
+ batch_ids = vessel_ids[i:i + group]
83
+ if not batch_ids:
84
+ continue
85
+ batch_ids_short = [vid[:16] for vid in batch_ids]
86
+
87
+ df_batch = extract_vessel_positions(
88
+ session=session,
89
+ vessel_ids_short=batch_ids_short,
90
+ start_timestamp=start_timestamp,
91
+ end_timestamp=end_timestamp,
92
+ interval=interval,
93
+ timeout=timeout,
94
+ )
95
+ if not df_batch.empty:
96
+ all_positions = pd.concat([all_positions, df_batch], ignore_index=True)
97
+
98
+ if verbose:
99
+ done = min(i + group, total)
100
+ print(f"{done} vessels done out of {total}")
101
+
102
+ return all_positions
103
+
104
+
105
+ def build_daily_positions_from_imos(imo_dict, time_min, time_max, interval="1d", group=20, verbose=True):
106
+ """Pull signals API positions for a fleet defined by IMO numbers."""
107
+ mapping_df = map_imos_to_vortexa_ids(imo_dict)
108
+ vessel_ids = mapping_df["vessel_id"].tolist()
109
+
110
+ positions = fetch_signals_for_fleet(
111
+ vessel_ids=vessel_ids,
112
+ start_timestamp=time_min,
113
+ end_timestamp=time_max,
114
+ interval=interval,
115
+ group=group,
116
+ verbose=verbose,
117
+ )
118
+
119
+ if positions.empty:
120
+ raise ValueError("Signals API returned no data for the selected vessels/time window.")
121
+
122
+ positions = positions.copy()
123
+ positions["timestamp"] = pd.to_datetime(positions["timestamp"], utc=True)
124
+ positions["date"] = positions["timestamp"].dt.floor("D")
125
+ positions["vessel_id_short"] = positions["vessel_id"].str[:16]
126
+
127
+ mapping_df = mapping_df.copy()
128
+ mapping_df.rename(columns={"vessel_id": "vortexa_vessel_id"}, inplace=True)
129
+ mapping_df["vessel_id_short"] = mapping_df["vortexa_vessel_id"].str[:16]
130
+
131
+ merged = positions.merge(
132
+ mapping_df[["user_vessel_name", "imo", "vortexa_vessel_id", "vessel_id_short"]],
133
+ on="vessel_id_short",
134
+ how="left",
135
+ )
136
+ merged = merged[merged["user_vessel_name"].notna()]
137
+
138
+ if merged.empty:
139
+ raise ValueError("Signals returned positions but none matched provided IMOs.")
140
+
141
+ merged_sorted = merged.sort_values(
142
+ ["user_vessel_name", "imo", "vessel_id_short", "date", "timestamp"]
143
+ )
144
+
145
+ daily = (
146
+ merged_sorted
147
+ .groupby(["user_vessel_name", "imo", "vortexa_vessel_id", "vessel_id_short", "date"], as_index=False)
148
+ .first()
149
+ )
150
+
151
+ lat_wide = daily.pivot(index="date", columns="user_vessel_name", values="lat").sort_index()
152
+ lon_wide = daily.pivot(index="date", columns="user_vessel_name", values="lon").sort_index()
153
+
154
+ daily["timestamp"] = daily["date"]
155
+ daily_long = daily[
156
+ ["date", "timestamp", "lat", "lon", "user_vessel_name", "imo",
157
+ "vortexa_vessel_id", "vessel_id_short"]
158
+ ]
159
+
160
+ return daily_long, lat_wide, lon_wide
161
+
162
+
163
+ def build_latlon_timeseries_table(lat_wide, lon_wide):
164
+ """Combine lat/lon wide tables into single table with Date + per-vessel Lon/Lat."""
165
+ lat_named = lat_wide.copy()
166
+ lon_named = lon_wide.copy()
167
+ lat_named.columns.name = None
168
+ lon_named.columns.name = None
169
+
170
+ lat_named = lat_named.add_suffix("_lat")
171
+ lon_named = lon_named.add_suffix("_lon")
172
+
173
+ out = pd.concat([lat_named, lon_named], axis=1)
174
+ out = out.sort_index(axis=1)
175
+ out = out.reset_index().rename(columns={"date": "Date"})
176
+ return out
177
+
178
+
179
+ def track_vessels_daily(
180
+ imo_dict, time_min, time_max,
181
+ interval="1d", group=20,
182
+ ):
183
+ """End-to-end: IMO dict -> signals positions -> daily data."""
184
+ daily_long, lat_wide, lon_wide = build_daily_positions_from_imos(
185
+ imo_dict=imo_dict,
186
+ time_min=time_min,
187
+ time_max=time_max,
188
+ interval=interval,
189
+ group=group,
190
+ verbose=True,
191
+ )
192
+ return {"daily_long": daily_long, "lat_wide": lat_wide, "lon_wide": lon_wide}