vortexa-claude-skills 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/VERSION +1 -0
- package/bin/.gitkeep +0 -0
- package/bin/setup.js +302 -0
- package/commands/vortexa/_check-setup.md +9 -0
- package/commands/vortexa/_skill-template.md +100 -0
- package/commands/vortexa/breakdown.md +294 -0
- package/commands/vortexa/cargo-flows.md +247 -0
- package/commands/vortexa/compare.md +315 -0
- package/commands/vortexa/custom.md +214 -0
- package/commands/vortexa/explain.md +124 -0
- package/commands/vortexa/init.md +133 -0
- package/commands/vortexa/oow.md +189 -0
- package/commands/vortexa/seasonal.md +185 -0
- package/commands/vortexa/voyages.md +285 -0
- package/context/.gitkeep +0 -0
- package/context/cargo-movements.md +738 -0
- package/context/date-units.md +188 -0
- package/context/endpoint-template.md +176 -0
- package/context/entity-resolution.md +217 -0
- package/context/guardrails.md +161 -0
- package/context/reference-endpoints.md +651 -0
- package/context/voyages.md +636 -0
- package/lib/__init__.py +4 -0
- package/lib/aliases.json +52 -0
- package/lib/api.py +20 -0
- package/lib/entities.py +254 -0
- package/lib/inventory.py +140 -0
- package/lib/movements.py +242 -0
- package/lib/requirements.txt +6 -0
- package/lib/seasonal.py +200 -0
- package/lib/timeseries.py +271 -0
- package/lib/utils.py +120 -0
- package/lib/vessels.py +192 -0
- package/lib/visualization.py +164 -0
- package/lib/voyages.py +236 -0
- package/package.json +28 -0
- package/templates/.env.template +3 -0
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
"""CargoTimeSeries queries and breakdown parsing."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
from lib.utils import _freq_norm
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def cargo_timeseries_split(
|
|
10
|
+
time_min, time_max, activity, split_property,
|
|
11
|
+
products=None, origins=None, destinations=None,
|
|
12
|
+
storage_locations=None, vessels=None,
|
|
13
|
+
unit="b", frequency="day",
|
|
14
|
+
intra_movements="exclude_intra_country",
|
|
15
|
+
):
|
|
16
|
+
"""Query CargoTimeSeries with split property, return pivoted wide DataFrame."""
|
|
17
|
+
from vortexasdk import CargoTimeSeries
|
|
18
|
+
|
|
19
|
+
kwargs = {
|
|
20
|
+
"filter_activity": activity,
|
|
21
|
+
"filter_time_min": time_min,
|
|
22
|
+
"filter_time_max": time_max,
|
|
23
|
+
"timeseries_frequency": frequency,
|
|
24
|
+
"timeseries_property": split_property,
|
|
25
|
+
"timeseries_unit": unit,
|
|
26
|
+
"intra_movements": intra_movements,
|
|
27
|
+
}
|
|
28
|
+
if products:
|
|
29
|
+
kwargs["filter_products"] = products
|
|
30
|
+
if origins:
|
|
31
|
+
kwargs["filter_origins"] = origins
|
|
32
|
+
if destinations:
|
|
33
|
+
kwargs["filter_destinations"] = destinations
|
|
34
|
+
if storage_locations:
|
|
35
|
+
kwargs["filter_storage_locations"] = storage_locations
|
|
36
|
+
if vessels:
|
|
37
|
+
kwargs["filter_vessels"] = vessels
|
|
38
|
+
|
|
39
|
+
result = CargoTimeSeries().search(**kwargs).to_list()
|
|
40
|
+
if not result:
|
|
41
|
+
return pd.DataFrame()
|
|
42
|
+
|
|
43
|
+
return _parse_split_result(result)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _parse_split_result(result):
|
|
47
|
+
"""Parse .to_list() CTS result with breakdown into wide DataFrame."""
|
|
48
|
+
rows = []
|
|
49
|
+
for item in result:
|
|
50
|
+
d = pd.to_datetime(getattr(item, "key"))
|
|
51
|
+
total = float(getattr(item, "value", 0.0) or 0.0)
|
|
52
|
+
breakdown = getattr(item, "breakdown", None) or []
|
|
53
|
+
|
|
54
|
+
if not breakdown:
|
|
55
|
+
rows.append((d, "Other", total))
|
|
56
|
+
continue
|
|
57
|
+
|
|
58
|
+
summed = 0.0
|
|
59
|
+
for b in breakdown:
|
|
60
|
+
label = (
|
|
61
|
+
getattr(b, "label", None)
|
|
62
|
+
or getattr(b, "name", None)
|
|
63
|
+
or getattr(b, "id", None)
|
|
64
|
+
or "Unknown"
|
|
65
|
+
)
|
|
66
|
+
val = float(getattr(b, "value", 0.0) or 0.0)
|
|
67
|
+
rows.append((d, label, val))
|
|
68
|
+
summed += val
|
|
69
|
+
|
|
70
|
+
residual = max(total - summed, 0.0)
|
|
71
|
+
if residual > 0:
|
|
72
|
+
rows.append((d, "Other", residual))
|
|
73
|
+
|
|
74
|
+
df_long = pd.DataFrame(rows, columns=["date", "series", "value"])
|
|
75
|
+
df_long["value"] = pd.to_numeric(df_long["value"], errors="coerce").fillna(0.0)
|
|
76
|
+
|
|
77
|
+
wide = (
|
|
78
|
+
df_long.groupby(["date", "series"], as_index=False)["value"]
|
|
79
|
+
.sum()
|
|
80
|
+
.pivot(index="date", columns="series", values="value")
|
|
81
|
+
.fillna(0.0)
|
|
82
|
+
.sort_index()
|
|
83
|
+
)
|
|
84
|
+
wide["Total"] = wide.sum(axis=1)
|
|
85
|
+
wide.columns.name = None
|
|
86
|
+
return wide
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _parse_timeseries_breakdown(df_all, show_top_x=None):
|
|
90
|
+
"""Flatten CargoTimeSeries 'columns=all' breakdown.N.* fields into wide Date x Category table."""
|
|
91
|
+
df = df_all.copy()
|
|
92
|
+
if "key" not in df.columns:
|
|
93
|
+
raise ValueError("Unexpected TimeSeries frame: missing 'key'.")
|
|
94
|
+
|
|
95
|
+
date = pd.to_datetime(df["key"], utc=True, errors="coerce")
|
|
96
|
+
if date.isna().all():
|
|
97
|
+
raise ValueError("TimeSeries: 'key' could not be parsed to datetime.")
|
|
98
|
+
|
|
99
|
+
pat_label = re.compile(r"^breakdown\.(\d+)\.label$")
|
|
100
|
+
labels = [c for c in df.columns if pat_label.match(c)]
|
|
101
|
+
|
|
102
|
+
if not labels:
|
|
103
|
+
out = pd.DataFrame({
|
|
104
|
+
"Date": date.dt.floor("D"),
|
|
105
|
+
"Total": pd.to_numeric(df.get("value", 0), errors="coerce").fillna(0.0),
|
|
106
|
+
})
|
|
107
|
+
return out.groupby("Date", as_index=False)["Total"].sum().sort_values("Date")
|
|
108
|
+
|
|
109
|
+
idxs = sorted({int(pat_label.match(c).group(1)) for c in labels})
|
|
110
|
+
parts = []
|
|
111
|
+
for i in idxs:
|
|
112
|
+
lab_col = f"breakdown.{i}.label"
|
|
113
|
+
val_col = f"breakdown.{i}.value"
|
|
114
|
+
if lab_col in df.columns and val_col in df.columns:
|
|
115
|
+
parts.append(pd.DataFrame({
|
|
116
|
+
"Date": date,
|
|
117
|
+
"label": df[lab_col].fillna(""),
|
|
118
|
+
"value": pd.to_numeric(df[val_col], errors="coerce").fillna(0.0),
|
|
119
|
+
}))
|
|
120
|
+
|
|
121
|
+
if not parts:
|
|
122
|
+
return pd.DataFrame({"Date": pd.to_datetime([])})
|
|
123
|
+
|
|
124
|
+
long = pd.concat(parts, ignore_index=True)
|
|
125
|
+
long = long[long["label"] != ""].copy()
|
|
126
|
+
if long.empty:
|
|
127
|
+
return pd.DataFrame({"Date": pd.to_datetime([])})
|
|
128
|
+
|
|
129
|
+
long["Date"] = pd.to_datetime(long["Date"]).dt.floor("D")
|
|
130
|
+
agg = long.groupby(["Date", "label"], as_index=False)["value"].sum()
|
|
131
|
+
if agg.empty:
|
|
132
|
+
return pd.DataFrame({"Date": pd.to_datetime([])})
|
|
133
|
+
|
|
134
|
+
wide = agg.pivot(index="Date", columns="label", values="value").fillna(0.0)
|
|
135
|
+
wide.index = pd.to_datetime(wide.index)
|
|
136
|
+
wide.index.name = "Date"
|
|
137
|
+
wide = wide.sort_index()
|
|
138
|
+
|
|
139
|
+
if show_top_x is not None and wide.shape[1] > show_top_x:
|
|
140
|
+
totals = wide.sum(axis=0).sort_values(ascending=False)
|
|
141
|
+
keep = list(totals.index[:show_top_x])
|
|
142
|
+
other = list(totals.index[show_top_x:])
|
|
143
|
+
if other:
|
|
144
|
+
wide["Other"] = wide[other].sum(axis=1)
|
|
145
|
+
wide = wide[keep + (["Other"] if other else [])]
|
|
146
|
+
|
|
147
|
+
out = wide.reset_index()
|
|
148
|
+
out = out.rename_axis(columns=None)
|
|
149
|
+
return out
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def flows_time_series_split(
|
|
153
|
+
*,
|
|
154
|
+
time_min,
|
|
155
|
+
time_max,
|
|
156
|
+
activity,
|
|
157
|
+
split_property="shipping_region_v2",
|
|
158
|
+
frequency="week",
|
|
159
|
+
top_n=8,
|
|
160
|
+
unit="b",
|
|
161
|
+
products=None,
|
|
162
|
+
origins=None,
|
|
163
|
+
destinations=None,
|
|
164
|
+
exclude_origins=None,
|
|
165
|
+
exclude_destinations=None,
|
|
166
|
+
exclude_products=None,
|
|
167
|
+
storage_locations=None,
|
|
168
|
+
intra_movements="exclude_intra_country",
|
|
169
|
+
):
|
|
170
|
+
"""CargoTimeSeries split by a property with Top-N + Other and resampling."""
|
|
171
|
+
from vortexasdk import CargoTimeSeries
|
|
172
|
+
|
|
173
|
+
freq_norm = _freq_norm(frequency)
|
|
174
|
+
|
|
175
|
+
split_api = split_property
|
|
176
|
+
if split_property == "shipping_region_v2":
|
|
177
|
+
if activity in ("loading_state", "loading_end"):
|
|
178
|
+
split_api = "origin_shipping_region_v2"
|
|
179
|
+
elif activity in ("unloading_state", "unloading_start"):
|
|
180
|
+
split_api = "destination_shipping_region_v2"
|
|
181
|
+
elif activity == "storing_state":
|
|
182
|
+
split_api = "storage_location_shipping_region_v2"
|
|
183
|
+
|
|
184
|
+
kwargs = {
|
|
185
|
+
"filter_time_min": time_min,
|
|
186
|
+
"filter_time_max": time_max,
|
|
187
|
+
"filter_activity": activity,
|
|
188
|
+
"timeseries_frequency": "day",
|
|
189
|
+
"timeseries_property": split_api,
|
|
190
|
+
"timeseries_unit": unit,
|
|
191
|
+
"intra_movements": intra_movements,
|
|
192
|
+
}
|
|
193
|
+
if products:
|
|
194
|
+
kwargs["filter_products"] = products
|
|
195
|
+
if origins:
|
|
196
|
+
kwargs["filter_origins"] = origins
|
|
197
|
+
if destinations:
|
|
198
|
+
kwargs["filter_destinations"] = destinations
|
|
199
|
+
if exclude_origins:
|
|
200
|
+
kwargs["exclude_origins"] = exclude_origins
|
|
201
|
+
if exclude_destinations:
|
|
202
|
+
kwargs["exclude_destinations"] = exclude_destinations
|
|
203
|
+
if exclude_products:
|
|
204
|
+
kwargs["exclude_products"] = exclude_products
|
|
205
|
+
if storage_locations:
|
|
206
|
+
kwargs["filter_storage_locations"] = storage_locations
|
|
207
|
+
|
|
208
|
+
res = CargoTimeSeries().search(**kwargs).to_list()
|
|
209
|
+
|
|
210
|
+
if not res:
|
|
211
|
+
return pd.DataFrame(), pd.DataFrame()
|
|
212
|
+
|
|
213
|
+
rows = []
|
|
214
|
+
for it in res:
|
|
215
|
+
d = pd.to_datetime(getattr(it, "key"))
|
|
216
|
+
total = float(getattr(it, "value", 0.0) or 0.0)
|
|
217
|
+
breakdown = getattr(it, "breakdown", None) or []
|
|
218
|
+
if not breakdown:
|
|
219
|
+
rows.append((d, "Other", total))
|
|
220
|
+
continue
|
|
221
|
+
summed = 0.0
|
|
222
|
+
for b in breakdown:
|
|
223
|
+
label = (
|
|
224
|
+
getattr(b, "label", None)
|
|
225
|
+
or getattr(b, "name", None)
|
|
226
|
+
or getattr(b, "id", None)
|
|
227
|
+
or "Unknown"
|
|
228
|
+
)
|
|
229
|
+
val = float(getattr(b, "value", 0.0) or 0.0)
|
|
230
|
+
rows.append((d, label, val))
|
|
231
|
+
summed += val
|
|
232
|
+
residual = max(total - summed, 0.0)
|
|
233
|
+
if residual > 0:
|
|
234
|
+
rows.append((d, "Other", residual))
|
|
235
|
+
|
|
236
|
+
df_long = pd.DataFrame(rows, columns=["date", "series", "value"])
|
|
237
|
+
df_long["value"] = pd.to_numeric(df_long["value"], errors="coerce").fillna(0.0)
|
|
238
|
+
|
|
239
|
+
full_piv = (
|
|
240
|
+
df_long.groupby(["date", "series"], as_index=False)["value"]
|
|
241
|
+
.sum()
|
|
242
|
+
.pivot(index="date", columns="series", values="value")
|
|
243
|
+
.fillna(0.0)
|
|
244
|
+
.sort_index()
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
resample_map = {
|
|
248
|
+
"week": "W-SUN", "month": "MS", "quarter": "QS", "year": "YS",
|
|
249
|
+
}
|
|
250
|
+
if freq_norm in resample_map:
|
|
251
|
+
plot_piv_base = full_piv.resample(resample_map[freq_norm]).sum()
|
|
252
|
+
else:
|
|
253
|
+
plot_piv_base = full_piv.copy()
|
|
254
|
+
|
|
255
|
+
sums = (
|
|
256
|
+
df_long[df_long.series != "Other"]
|
|
257
|
+
.groupby("series")["value"]
|
|
258
|
+
.sum()
|
|
259
|
+
.sort_values(ascending=False)
|
|
260
|
+
)
|
|
261
|
+
keep = sums.head(top_n).index.tolist()
|
|
262
|
+
|
|
263
|
+
plot_piv = plot_piv_base.copy()
|
|
264
|
+
other_cols = [c for c in plot_piv.columns if c not in keep]
|
|
265
|
+
if keep:
|
|
266
|
+
plot_piv = plot_piv.reindex(columns=keep + other_cols)
|
|
267
|
+
if other_cols:
|
|
268
|
+
plot_piv["Other"] = plot_piv[other_cols].sum(axis=1)
|
|
269
|
+
plot_piv = plot_piv[keep + (["Other"] if "Other" in plot_piv.columns else [])]
|
|
270
|
+
|
|
271
|
+
return full_piv, plot_piv.fillna(0.0)
|
package/lib/utils.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""Date helpers, DataFrame formatters, and shared utilities."""
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _to_dt(x):
|
|
8
|
+
"""Convert to datetime, strip timezone."""
|
|
9
|
+
x = pd.to_datetime(x, errors="coerce")
|
|
10
|
+
if isinstance(x, (pd.Series, pd.Index)):
|
|
11
|
+
return x.dt.tz_localize(None)
|
|
12
|
+
if isinstance(x, (pd.Timestamp, datetime)):
|
|
13
|
+
try:
|
|
14
|
+
return x.tz_localize(None)
|
|
15
|
+
except Exception:
|
|
16
|
+
try:
|
|
17
|
+
return x.tz_convert(None)
|
|
18
|
+
except Exception:
|
|
19
|
+
return x
|
|
20
|
+
return x
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _cols(df, names):
|
|
24
|
+
"""Safe column selection -- returns only columns that exist in df."""
|
|
25
|
+
return [c for c in names if c in df.columns]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _freq_norm(x):
|
|
29
|
+
"""Normalize user-friendly frequency string to API value."""
|
|
30
|
+
m = {
|
|
31
|
+
"d": "day", "day": "day", "daily": "day",
|
|
32
|
+
"w": "week", "week": "week", "weekly": "week",
|
|
33
|
+
"m": "month", "mon": "month", "month": "month", "monthly": "month",
|
|
34
|
+
"q": "quarter", "quarter": "quarter", "quarterly": "quarter",
|
|
35
|
+
"y": "year", "yr": "year", "year": "year", "annual": "year", "annually": "year",
|
|
36
|
+
}
|
|
37
|
+
x = (x or "").lower()
|
|
38
|
+
out = m.get(x, x)
|
|
39
|
+
if out not in {"day", "week", "month", "quarter", "year"}:
|
|
40
|
+
raise ValueError("frequency must map to day/week/month/quarter/year")
|
|
41
|
+
return out
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
_CM_COLUMN_MAP = {
|
|
45
|
+
"cargo_movement_id": "Cargo ID",
|
|
46
|
+
"quantity": "Volume",
|
|
47
|
+
"discharge_quantity": "Discharge Volume",
|
|
48
|
+
"status": "Status",
|
|
49
|
+
"product.group.label": "Product Group",
|
|
50
|
+
"product.group_product.label": "Product Sub-Group",
|
|
51
|
+
"product.category.label": "Product Category",
|
|
52
|
+
"product.grade.label": "Product Grade",
|
|
53
|
+
"events.cargo_port_load_event.0.location.terminal.label": "Load Terminal",
|
|
54
|
+
"events.cargo_port_load_event.0.location.port.label": "Load Port",
|
|
55
|
+
"events.cargo_port_load_event.0.location.country.label": "Load Country",
|
|
56
|
+
"events.cargo_port_load_event.0.location.shipping_region_v2.label": "Load Shipping Region",
|
|
57
|
+
"events.cargo_port_load_event.0.start_timestamp": "Load Start",
|
|
58
|
+
"events.cargo_port_load_event.0.end_timestamp": "Load Date",
|
|
59
|
+
"events.cargo_port_unload_event.0.location.terminal.label": "Unload Terminal",
|
|
60
|
+
"events.cargo_port_unload_event.0.location.port.label": "Unload Port",
|
|
61
|
+
"events.cargo_port_unload_event.0.location.country.label": "Unload Country",
|
|
62
|
+
"events.cargo_port_unload_event.0.location.shipping_region_v2.label": "Unload Shipping Region",
|
|
63
|
+
"events.cargo_port_unload_event.0.start_timestamp": "Unload Date",
|
|
64
|
+
"events.cargo_port_unload_event.0.end_timestamp": "Unload End",
|
|
65
|
+
"vessels.0.name": "Vessel Name",
|
|
66
|
+
"vessels.0.imo": "Vessel IMO",
|
|
67
|
+
"vessels.0.vessel_class": "Vessel Class",
|
|
68
|
+
"vessels.0.corporate_entities.charterer.label": "Charterer",
|
|
69
|
+
"vessels.0.corporate_entities.effective_controller.label": "Effective Controller",
|
|
70
|
+
"vessels.0.corporate_entities.time_charterer.label": "Time Charterer",
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
_VSE_COLUMN_MAP = {
|
|
74
|
+
"START DATE": "Voyage Start",
|
|
75
|
+
"END DATE": "Voyage End",
|
|
76
|
+
"LATEST PRODUCT GROUP": "Product Group",
|
|
77
|
+
"LATEST PRODUCT GRADE": "Product Grade",
|
|
78
|
+
"ORIGIN COUNTRY": "Origin Country",
|
|
79
|
+
"DESTINATION COUNTRY": "Destination Country",
|
|
80
|
+
"ORIGIN PORT": "Origin Port",
|
|
81
|
+
"DESTINATION PORT": "Destination Port",
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def rename_cm_columns(df):
|
|
86
|
+
"""Rename CargoMovements DataFrame columns to human-readable names."""
|
|
87
|
+
return df.rename(columns={k: v for k, v in _CM_COLUMN_MAP.items() if k in df.columns})
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def rename_vse_columns(df):
|
|
91
|
+
"""Rename VoyagesSearchEnriched DataFrame columns to human-readable names."""
|
|
92
|
+
return df.rename(columns={k: v for k, v in _VSE_COLUMN_MAP.items() if k in df.columns})
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def top_n_with_other(df, n=10, total_col="Total"):
|
|
96
|
+
"""Keep top N columns by sum, aggregate the rest into 'Other'."""
|
|
97
|
+
out = df.copy()
|
|
98
|
+
|
|
99
|
+
# Separate total column if present
|
|
100
|
+
has_total = total_col in out.columns
|
|
101
|
+
if has_total:
|
|
102
|
+
total_series = out.pop(total_col)
|
|
103
|
+
|
|
104
|
+
# Rank columns by sum descending
|
|
105
|
+
ranked = out.sum().sort_values(ascending=False)
|
|
106
|
+
top_cols = ranked.index[:n].tolist()
|
|
107
|
+
rest_cols = ranked.index[n:].tolist()
|
|
108
|
+
|
|
109
|
+
# Build result with top N columns
|
|
110
|
+
result = out[top_cols].copy()
|
|
111
|
+
|
|
112
|
+
# Add Other only if there are leftover columns
|
|
113
|
+
if rest_cols:
|
|
114
|
+
result["Other"] = out[rest_cols].sum(axis=1)
|
|
115
|
+
|
|
116
|
+
# Restore total column
|
|
117
|
+
if has_total:
|
|
118
|
+
result[total_col] = total_series
|
|
119
|
+
|
|
120
|
+
return result
|
package/lib/vessels.py
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
"""Vessel tracking -- IMO resolution, signals positions, fleet tracking."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import requests
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
signals_url = "https://api.vortexa.com/v6/signals/vessel-positions"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def map_imos_to_vortexa_ids(imo_dict):
|
|
13
|
+
"""Map {name: imo_number} dict to DataFrame with vessel IDs."""
|
|
14
|
+
from vortexasdk import Vessels
|
|
15
|
+
|
|
16
|
+
api_key = os.environ.get("VORTEXA_API_KEY")
|
|
17
|
+
if api_key is None:
|
|
18
|
+
raise RuntimeError("VORTEXA_API_KEY not set in environment.")
|
|
19
|
+
|
|
20
|
+
clean_imo_dict = {}
|
|
21
|
+
for name, imo in imo_dict.items():
|
|
22
|
+
if imo is None:
|
|
23
|
+
continue
|
|
24
|
+
try:
|
|
25
|
+
clean_imo_dict[name] = int(str(imo).strip())
|
|
26
|
+
except ValueError:
|
|
27
|
+
print(f"WARNING: Could not parse IMO '{imo}' for '{name}'. Skipping.")
|
|
28
|
+
|
|
29
|
+
imo_list = list(clean_imo_dict.values())
|
|
30
|
+
if not imo_list:
|
|
31
|
+
raise ValueError("No valid IMOs provided in imo_dict.")
|
|
32
|
+
|
|
33
|
+
vessels_df = Vessels().search().to_df(columns=["id", "name", "imo", "vessel_class"])
|
|
34
|
+
vessels_df["imo"] = pd.to_numeric(vessels_df["imo"], errors="coerce")
|
|
35
|
+
vessels_subset = vessels_df[vessels_df["imo"].isin(imo_list)]
|
|
36
|
+
|
|
37
|
+
mapping_records = []
|
|
38
|
+
for user_name, imo in clean_imo_dict.items():
|
|
39
|
+
matches = vessels_subset[vessels_subset["imo"] == imo]
|
|
40
|
+
if matches.empty:
|
|
41
|
+
print(f"WARNING: IMO {imo} for '{user_name}' not found.")
|
|
42
|
+
continue
|
|
43
|
+
row = matches.iloc[0]
|
|
44
|
+
mapping_records.append({
|
|
45
|
+
"user_vessel_name": user_name,
|
|
46
|
+
"imo": imo,
|
|
47
|
+
"vessel_id": row["id"],
|
|
48
|
+
"vortexa_name": row["name"],
|
|
49
|
+
"vessel_class": row.get("vessel_class"),
|
|
50
|
+
})
|
|
51
|
+
|
|
52
|
+
mapping_df = pd.DataFrame(mapping_records)
|
|
53
|
+
if mapping_df.empty:
|
|
54
|
+
raise ValueError("No IMOs could be matched to Vortexa vessels.")
|
|
55
|
+
return mapping_df
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def extract_vessel_positions(session, vessel_ids_short, start_timestamp, end_timestamp, interval="1d", timeout=45):
|
|
59
|
+
"""Single batch signals API call for vessel positions."""
|
|
60
|
+
api_key = os.environ.get("VORTEXA_API_KEY")
|
|
61
|
+
params = {
|
|
62
|
+
"apikey": api_key,
|
|
63
|
+
"time_min": start_timestamp.isoformat(),
|
|
64
|
+
"time_max": end_timestamp.isoformat(),
|
|
65
|
+
"interval": interval,
|
|
66
|
+
"vessel_id": vessel_ids_short,
|
|
67
|
+
}
|
|
68
|
+
resp = session.get(signals_url, params=params, timeout=timeout)
|
|
69
|
+
resp.raise_for_status()
|
|
70
|
+
data = resp.json()
|
|
71
|
+
return pd.DataFrame(data.get("data", []))
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def fetch_signals_for_fleet(vessel_ids, start_timestamp, end_timestamp,
|
|
75
|
+
interval="1d", group=20, verbose=True, timeout=45):
|
|
76
|
+
"""Batch vessel position pull with rate limiting (groups of N)."""
|
|
77
|
+
all_positions = pd.DataFrame()
|
|
78
|
+
total = len(vessel_ids)
|
|
79
|
+
|
|
80
|
+
with requests.Session() as session:
|
|
81
|
+
for i in range(0, total, group):
|
|
82
|
+
batch_ids = vessel_ids[i:i + group]
|
|
83
|
+
if not batch_ids:
|
|
84
|
+
continue
|
|
85
|
+
batch_ids_short = [vid[:16] for vid in batch_ids]
|
|
86
|
+
|
|
87
|
+
df_batch = extract_vessel_positions(
|
|
88
|
+
session=session,
|
|
89
|
+
vessel_ids_short=batch_ids_short,
|
|
90
|
+
start_timestamp=start_timestamp,
|
|
91
|
+
end_timestamp=end_timestamp,
|
|
92
|
+
interval=interval,
|
|
93
|
+
timeout=timeout,
|
|
94
|
+
)
|
|
95
|
+
if not df_batch.empty:
|
|
96
|
+
all_positions = pd.concat([all_positions, df_batch], ignore_index=True)
|
|
97
|
+
|
|
98
|
+
if verbose:
|
|
99
|
+
done = min(i + group, total)
|
|
100
|
+
print(f"{done} vessels done out of {total}")
|
|
101
|
+
|
|
102
|
+
return all_positions
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def build_daily_positions_from_imos(imo_dict, time_min, time_max, interval="1d", group=20, verbose=True):
|
|
106
|
+
"""Pull signals API positions for a fleet defined by IMO numbers."""
|
|
107
|
+
mapping_df = map_imos_to_vortexa_ids(imo_dict)
|
|
108
|
+
vessel_ids = mapping_df["vessel_id"].tolist()
|
|
109
|
+
|
|
110
|
+
positions = fetch_signals_for_fleet(
|
|
111
|
+
vessel_ids=vessel_ids,
|
|
112
|
+
start_timestamp=time_min,
|
|
113
|
+
end_timestamp=time_max,
|
|
114
|
+
interval=interval,
|
|
115
|
+
group=group,
|
|
116
|
+
verbose=verbose,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
if positions.empty:
|
|
120
|
+
raise ValueError("Signals API returned no data for the selected vessels/time window.")
|
|
121
|
+
|
|
122
|
+
positions = positions.copy()
|
|
123
|
+
positions["timestamp"] = pd.to_datetime(positions["timestamp"], utc=True)
|
|
124
|
+
positions["date"] = positions["timestamp"].dt.floor("D")
|
|
125
|
+
positions["vessel_id_short"] = positions["vessel_id"].str[:16]
|
|
126
|
+
|
|
127
|
+
mapping_df = mapping_df.copy()
|
|
128
|
+
mapping_df.rename(columns={"vessel_id": "vortexa_vessel_id"}, inplace=True)
|
|
129
|
+
mapping_df["vessel_id_short"] = mapping_df["vortexa_vessel_id"].str[:16]
|
|
130
|
+
|
|
131
|
+
merged = positions.merge(
|
|
132
|
+
mapping_df[["user_vessel_name", "imo", "vortexa_vessel_id", "vessel_id_short"]],
|
|
133
|
+
on="vessel_id_short",
|
|
134
|
+
how="left",
|
|
135
|
+
)
|
|
136
|
+
merged = merged[merged["user_vessel_name"].notna()]
|
|
137
|
+
|
|
138
|
+
if merged.empty:
|
|
139
|
+
raise ValueError("Signals returned positions but none matched provided IMOs.")
|
|
140
|
+
|
|
141
|
+
merged_sorted = merged.sort_values(
|
|
142
|
+
["user_vessel_name", "imo", "vessel_id_short", "date", "timestamp"]
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
daily = (
|
|
146
|
+
merged_sorted
|
|
147
|
+
.groupby(["user_vessel_name", "imo", "vortexa_vessel_id", "vessel_id_short", "date"], as_index=False)
|
|
148
|
+
.first()
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
lat_wide = daily.pivot(index="date", columns="user_vessel_name", values="lat").sort_index()
|
|
152
|
+
lon_wide = daily.pivot(index="date", columns="user_vessel_name", values="lon").sort_index()
|
|
153
|
+
|
|
154
|
+
daily["timestamp"] = daily["date"]
|
|
155
|
+
daily_long = daily[
|
|
156
|
+
["date", "timestamp", "lat", "lon", "user_vessel_name", "imo",
|
|
157
|
+
"vortexa_vessel_id", "vessel_id_short"]
|
|
158
|
+
]
|
|
159
|
+
|
|
160
|
+
return daily_long, lat_wide, lon_wide
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def build_latlon_timeseries_table(lat_wide, lon_wide):
|
|
164
|
+
"""Combine lat/lon wide tables into single table with Date + per-vessel Lon/Lat."""
|
|
165
|
+
lat_named = lat_wide.copy()
|
|
166
|
+
lon_named = lon_wide.copy()
|
|
167
|
+
lat_named.columns.name = None
|
|
168
|
+
lon_named.columns.name = None
|
|
169
|
+
|
|
170
|
+
lat_named = lat_named.add_suffix("_lat")
|
|
171
|
+
lon_named = lon_named.add_suffix("_lon")
|
|
172
|
+
|
|
173
|
+
out = pd.concat([lat_named, lon_named], axis=1)
|
|
174
|
+
out = out.sort_index(axis=1)
|
|
175
|
+
out = out.reset_index().rename(columns={"date": "Date"})
|
|
176
|
+
return out
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def track_vessels_daily(
|
|
180
|
+
imo_dict, time_min, time_max,
|
|
181
|
+
interval="1d", group=20,
|
|
182
|
+
):
|
|
183
|
+
"""End-to-end: IMO dict -> signals positions -> daily data."""
|
|
184
|
+
daily_long, lat_wide, lon_wide = build_daily_positions_from_imos(
|
|
185
|
+
imo_dict=imo_dict,
|
|
186
|
+
time_min=time_min,
|
|
187
|
+
time_max=time_max,
|
|
188
|
+
interval=interval,
|
|
189
|
+
group=group,
|
|
190
|
+
verbose=True,
|
|
191
|
+
)
|
|
192
|
+
return {"daily_long": daily_long, "lat_wide": lat_wide, "lon_wide": lon_wide}
|