pyvolca 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyvolca-0.1.0.dist-info/METADATA +48 -0
- pyvolca-0.1.0.dist-info/RECORD +11 -0
- pyvolca-0.1.0.dist-info/WHEEL +5 -0
- pyvolca-0.1.0.dist-info/licenses/LICENSE +21 -0
- pyvolca-0.1.0.dist-info/top_level.txt +1 -0
- volca/__init__.py +6 -0
- volca/analysis.py +269 -0
- volca/classify.py +65 -0
- volca/client.py +219 -0
- volca/server.py +132 -0
- volca/types.py +83 -0
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pyvolca
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python client for VoLCA — Life Cycle Assessment engine
|
|
5
|
+
Author-email: Christophe Combelles <ccomb@free.fr>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Repository, https://github.com/ccomb/volca/tree/main/pyvolca
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Topic :: Scientific/Engineering
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: requests>=2.28
|
|
14
|
+
Provides-Extra: dev
|
|
15
|
+
Requires-Dist: pytest; extra == "dev"
|
|
16
|
+
Dynamic: license-file
|
|
17
|
+
|
|
18
|
+
# pyvolca
|
|
19
|
+
|
|
20
|
+
Python client for [VoLCA](https://github.com/ccomb/volca), a Life Cycle Assessment engine.
|
|
21
|
+
|
|
22
|
+
## Install
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install pyvolca
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Quick start
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from volca import Client
|
|
32
|
+
|
|
33
|
+
c = Client(db="agribalyse-3.2", password="1234")
|
|
34
|
+
|
|
35
|
+
# Search activities
|
|
36
|
+
plants = c.search_activities(name="at plant")
|
|
37
|
+
|
|
38
|
+
# Supply chain
|
|
39
|
+
chain = c.get_supply_chain(plants[0].process_id, name="at farm")
|
|
40
|
+
|
|
41
|
+
# LCIA with substitutions (Sherman-Morrison rank-1 updates)
|
|
42
|
+
subs = [{"from": old_pid, "to": new_pid, "consumer": consumer_pid}]
|
|
43
|
+
result = c.get_lcia(pid, method_id, substitutions=subs)
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## License
|
|
47
|
+
|
|
48
|
+
MIT
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
pyvolca-0.1.0.dist-info/licenses/LICENSE,sha256=WeMtBxEZUylxEcqik6gaWgAdsGripQ-c_cXTwiqDjeU,1082
|
|
2
|
+
volca/__init__.py,sha256=yDMJFF7TmK7V6UGogNFYkYyjBIkoqyS_ECXV9VFE9Rs,147
|
|
3
|
+
volca/analysis.py,sha256=H9TStxiaS62gGivDcWidjbBdiia16LN1xTZFbc--S7k,8423
|
|
4
|
+
volca/classify.py,sha256=KKhw-4P-NhTGeVmGiAv0hbBcMmNCbKzJ6phPcWGLWLs,2605
|
|
5
|
+
volca/client.py,sha256=QTTZXgFAOSWFACl3BzQNwxHjjWV2S_Tb9qzkxT14WPQ,7537
|
|
6
|
+
volca/server.py,sha256=EHExCR6RvqCjJMKtfqmcJqDOIzMvYEu7VBRpGwovuZ8,3919
|
|
7
|
+
volca/types.py,sha256=fstNKKPxh53k9_vWA0A9t3Y_ksxmLt6DJIgFq1NO-Aw,2136
|
|
8
|
+
pyvolca-0.1.0.dist-info/METADATA,sha256=G4WTjA0Wg7CJ43LCc32CU2lC-z64g5oXoU2251Ulhy4,1154
|
|
9
|
+
pyvolca-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
10
|
+
pyvolca-0.1.0.dist-info/top_level.txt,sha256=apMTTyWRE0IWimyd2DvNqN3yLakBs82TPxSYCz5CGJI,6
|
|
11
|
+
pyvolca-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024-2026 Christophe Combelles
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
volca
|
volca/__init__.py
ADDED
volca/analysis.py
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
"""Supply chain analysis helpers.
|
|
2
|
+
|
|
3
|
+
Provides both generic classification-based grouping (using structured data
|
|
4
|
+
from the database) and legacy semantic matching for databases without
|
|
5
|
+
classification metadata.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
|
|
10
|
+
from .classify import SemanticClassifier
|
|
11
|
+
from .client import Client
|
|
12
|
+
from .types import SupplyChainEntry
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def group_supply_chain_by_classification(
|
|
16
|
+
client: Client, process_id: str, prefix: str | None = None, limit: int = 1000
|
|
17
|
+
) -> dict[str, list[SupplyChainEntry]]:
|
|
18
|
+
"""Group supply chain entries by their classification path.
|
|
19
|
+
|
|
20
|
+
If prefix given (e.g. 'Agricultural\\Animal feed'), only entries
|
|
21
|
+
matching that prefix are included.
|
|
22
|
+
"""
|
|
23
|
+
chain = client.get_supply_chain(process_id, limit=limit)
|
|
24
|
+
groups: dict[str, list[SupplyChainEntry]] = {}
|
|
25
|
+
for entry in chain.entries:
|
|
26
|
+
cat = entry.classifications.get("Category", "")
|
|
27
|
+
if not cat:
|
|
28
|
+
cat = "(unclassified)"
|
|
29
|
+
if prefix and not cat.startswith(prefix):
|
|
30
|
+
continue
|
|
31
|
+
groups.setdefault(cat, []).append(entry)
|
|
32
|
+
return groups
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# -- Legacy semantic matching (for databases without classification data) --
|
|
36
|
+
|
|
37
|
+
# Reference phrases for semantic matching
|
|
38
|
+
ANIMAL_PHRASES = [
|
|
39
|
+
"live animal at farm gate",
|
|
40
|
+
"livestock for slaughter at farm",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
FEED_PHRASES = [
|
|
44
|
+
"animal feed ingredient for livestock",
|
|
45
|
+
]
|
|
46
|
+
|
|
47
|
+
FEED_CATEGORIES = [
|
|
48
|
+
"grazed grass/pasture",
|
|
49
|
+
"hay/dried forage",
|
|
50
|
+
"silage maize",
|
|
51
|
+
"cereal grain",
|
|
52
|
+
"soybean meal",
|
|
53
|
+
"rapeseed meal",
|
|
54
|
+
"sunflower meal",
|
|
55
|
+
"compound feed/premix",
|
|
56
|
+
"milk for calf",
|
|
57
|
+
"amino acids/additives",
|
|
58
|
+
"DDGS/co-products",
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# -- Exchange accessors (handle the nested API response structure) --
|
|
63
|
+
|
|
64
|
+
def _exch_name(inp: dict) -> str:
|
|
65
|
+
return inp.get("edFlow", {}).get("flowName", "")
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _exch_amount(inp: dict) -> float:
|
|
69
|
+
exch = inp.get("edExchange", {})
|
|
70
|
+
return abs(float(exch.get("techAmount", 0) or exch.get("bioAmount", 0)))
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _exch_unit(inp: dict) -> str:
|
|
74
|
+
return inp.get("edExchangeUnitName", "")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _exch_linked_id(inp: dict) -> str:
|
|
78
|
+
target = inp.get("edTargetActivity")
|
|
79
|
+
return target.get("prsId", "") if target else ""
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@dataclass
|
|
83
|
+
class AnimalCandidate:
|
|
84
|
+
process_id: str
|
|
85
|
+
name: str
|
|
86
|
+
location: str
|
|
87
|
+
score: float
|
|
88
|
+
quantity: float
|
|
89
|
+
unit: str
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@dataclass
|
|
93
|
+
class FeedInput:
|
|
94
|
+
name: str
|
|
95
|
+
category: str
|
|
96
|
+
category_score: float
|
|
97
|
+
amount: float
|
|
98
|
+
unit: str
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@dataclass
|
|
102
|
+
class SubAnimal:
|
|
103
|
+
process_id: str
|
|
104
|
+
name: str
|
|
105
|
+
amount: float
|
|
106
|
+
unit: str
|
|
107
|
+
feeds: list[FeedInput] = field(default_factory=list)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def find_animal_candidates(
|
|
111
|
+
client: Client,
|
|
112
|
+
process_id: str,
|
|
113
|
+
classifier: SemanticClassifier,
|
|
114
|
+
top_k: int = 5,
|
|
115
|
+
) -> list[AnimalCandidate]:
|
|
116
|
+
"""Find live animal candidates in a product's supply chain.
|
|
117
|
+
|
|
118
|
+
Pre-filters to "at farm" entries (structural LCA marker), then ranks
|
|
119
|
+
by semantic similarity to animal reference phrases.
|
|
120
|
+
"""
|
|
121
|
+
chain = client.get_supply_chain(process_id, limit=1000)
|
|
122
|
+
# Pre-filter: "at farm" processes, excluding feed/processing/slaughter
|
|
123
|
+
exclude = {"feed", "slaughter", "processing", "manure", "housing", "emission"}
|
|
124
|
+
farm_entries = [
|
|
125
|
+
e for e in chain.entries
|
|
126
|
+
if "at farm" in e.name.lower()
|
|
127
|
+
and not any(w in e.name.lower() for w in exclude)
|
|
128
|
+
]
|
|
129
|
+
if not farm_entries:
|
|
130
|
+
farm_entries = chain.entries # fallback to full chain
|
|
131
|
+
names = [e.name for e in farm_entries]
|
|
132
|
+
ranked = classifier.rank_by_similarity(names, ANIMAL_PHRASES, top_k=top_k)
|
|
133
|
+
return [
|
|
134
|
+
AnimalCandidate(
|
|
135
|
+
process_id=farm_entries[i].process_id,
|
|
136
|
+
name=name,
|
|
137
|
+
location=farm_entries[i].location,
|
|
138
|
+
score=score,
|
|
139
|
+
quantity=farm_entries[i].quantity,
|
|
140
|
+
unit=farm_entries[i].unit,
|
|
141
|
+
)
|
|
142
|
+
for i, name, score in ranked
|
|
143
|
+
]
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _is_sub_animal(name: str) -> bool:
|
|
147
|
+
"""Check if an input name looks like a sub-animal process.
|
|
148
|
+
|
|
149
|
+
Sub-animals are "at farm gate" processes that are not feed or supplements.
|
|
150
|
+
We check the beginning of the name (before the first comma) to avoid
|
|
151
|
+
false exclusions on names like "Cull cow, ..., silage maize 30%, at farm".
|
|
152
|
+
"""
|
|
153
|
+
low = name.lower()
|
|
154
|
+
if "at farm" not in low:
|
|
155
|
+
return False
|
|
156
|
+
# Check only the first part of the name (the main subject)
|
|
157
|
+
subject = low.split(",")[0].strip()
|
|
158
|
+
feed_terms = {"feed", "grain", "straw", "meal", "silage", "hay", "oil",
|
|
159
|
+
"lysine", "methionine", "tryptophane", "ddgs", "seed",
|
|
160
|
+
"reproductive", "alfalfa", "rapeseed", "soybean", "sunflower",
|
|
161
|
+
"wheat", "maize", "barley"}
|
|
162
|
+
return not any(w in subject for w in feed_terms)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def detect_aggregation(client: Client, animal_id: str) -> tuple[bool, list[dict]]:
|
|
166
|
+
"""Detect if an animal process aggregates sub-animals or has direct feed.
|
|
167
|
+
|
|
168
|
+
Sub-animals are "at farm gate" kg-unit inputs that aren't feed.
|
|
169
|
+
Returns (is_aggregated, sub_animal_inputs).
|
|
170
|
+
"""
|
|
171
|
+
inputs = client.get_inputs(animal_id)
|
|
172
|
+
linked = [inp for inp in inputs if _exch_linked_id(inp)]
|
|
173
|
+
if len(linked) <= 1:
|
|
174
|
+
return False, linked
|
|
175
|
+
sub_animals = [
|
|
176
|
+
inp for inp in linked
|
|
177
|
+
if _exch_unit(inp) == "kg" and _is_sub_animal(_exch_name(inp))
|
|
178
|
+
]
|
|
179
|
+
return len(sub_animals) > 1, sub_animals
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def get_feed_inputs(
|
|
183
|
+
client: Client,
|
|
184
|
+
animal_id: str,
|
|
185
|
+
classifier: SemanticClassifier,
|
|
186
|
+
threshold: float = 0.4,
|
|
187
|
+
) -> list[FeedInput]:
|
|
188
|
+
"""Identify and categorize feed inputs for a single animal process."""
|
|
189
|
+
inputs = client.get_inputs(animal_id)
|
|
190
|
+
names = [_exch_name(inp) for inp in inputs]
|
|
191
|
+
if not names:
|
|
192
|
+
return []
|
|
193
|
+
|
|
194
|
+
# Filter to feed-like inputs
|
|
195
|
+
feed_matches = classifier.filter_by_similarity(names, FEED_PHRASES, threshold)
|
|
196
|
+
if not feed_matches:
|
|
197
|
+
return []
|
|
198
|
+
|
|
199
|
+
feed_indices = {i for i, _, _ in feed_matches}
|
|
200
|
+
feed_names = [names[i] for i in sorted(feed_indices)]
|
|
201
|
+
feed_inputs_raw = [inputs[i] for i in sorted(feed_indices)]
|
|
202
|
+
|
|
203
|
+
# Classify into categories
|
|
204
|
+
classified = classifier.classify(feed_names, FEED_CATEGORIES)
|
|
205
|
+
return [
|
|
206
|
+
FeedInput(
|
|
207
|
+
name=name,
|
|
208
|
+
category=cat,
|
|
209
|
+
category_score=score,
|
|
210
|
+
amount=_exch_amount(feed_inputs_raw[j]),
|
|
211
|
+
unit=_exch_unit(feed_inputs_raw[j]),
|
|
212
|
+
)
|
|
213
|
+
for j, (name, cat, score) in enumerate(classified)
|
|
214
|
+
]
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def get_reference_amount(client: Client, process_id: str) -> float:
|
|
218
|
+
"""Get the reference product amount for normalization."""
|
|
219
|
+
activity = client.get_activity(process_id)
|
|
220
|
+
products = activity.get("piActivity", {}).get("pfaAllProducts", [])
|
|
221
|
+
if products:
|
|
222
|
+
return float(products[0].get("prsProductAmount", 1.0))
|
|
223
|
+
return float(activity.get("prsProductAmount", 1.0))
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def analyze_animal_feed(
|
|
227
|
+
client: Client,
|
|
228
|
+
animal_id: str,
|
|
229
|
+
classifier: SemanticClassifier,
|
|
230
|
+
feed_threshold: float = 0.4,
|
|
231
|
+
) -> tuple[list[SubAnimal], float]:
|
|
232
|
+
"""Full feed analysis: detect aggregation, get feeds for each (sub-)animal.
|
|
233
|
+
|
|
234
|
+
Returns (sub_animals_with_feeds, reference_amount).
|
|
235
|
+
"""
|
|
236
|
+
ref_amount = get_reference_amount(client, animal_id)
|
|
237
|
+
is_agg, sub_inputs = detect_aggregation(client, animal_id)
|
|
238
|
+
|
|
239
|
+
if is_agg:
|
|
240
|
+
sub_animals = []
|
|
241
|
+
for inp in sub_inputs:
|
|
242
|
+
sub_id = _exch_linked_id(inp)
|
|
243
|
+
sub_ref = get_reference_amount(client, sub_id)
|
|
244
|
+
feeds = get_feed_inputs(client, sub_id, classifier, feed_threshold)
|
|
245
|
+
# Normalize feed amounts: per kg of sub-animal output
|
|
246
|
+
for f in feeds:
|
|
247
|
+
f.amount = f.amount / sub_ref if sub_ref > 0 else f.amount
|
|
248
|
+
sub = SubAnimal(
|
|
249
|
+
process_id=sub_id,
|
|
250
|
+
name=_exch_name(inp),
|
|
251
|
+
amount=_exch_amount(inp),
|
|
252
|
+
unit=_exch_unit(inp),
|
|
253
|
+
feeds=feeds,
|
|
254
|
+
)
|
|
255
|
+
sub_animals.append(sub)
|
|
256
|
+
return sub_animals, ref_amount
|
|
257
|
+
else:
|
|
258
|
+
# Single animal — feeds are direct inputs, normalize per kg
|
|
259
|
+
feeds = get_feed_inputs(client, animal_id, classifier, feed_threshold)
|
|
260
|
+
for f in feeds:
|
|
261
|
+
f.amount = f.amount / ref_amount if ref_amount > 0 else f.amount
|
|
262
|
+
single = SubAnimal(
|
|
263
|
+
process_id=animal_id,
|
|
264
|
+
name="(direct)",
|
|
265
|
+
amount=ref_amount,
|
|
266
|
+
unit="kg",
|
|
267
|
+
feeds=feeds,
|
|
268
|
+
)
|
|
269
|
+
return [single], ref_amount
|
volca/classify.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Semantic classification using sentence-transformers embeddings."""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from sentence_transformers import SentenceTransformer
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SemanticClassifier:
|
|
8
|
+
"""Classify and rank text strings by cosine similarity to reference phrases.
|
|
9
|
+
|
|
10
|
+
Uses all-mpnet-base-v2 (768 dims) for high-quality semantic matching.
|
|
11
|
+
Single model instance handles all classification tasks.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, model_name: str = "all-mpnet-base-v2"):
|
|
15
|
+
self._model = SentenceTransformer(model_name)
|
|
16
|
+
|
|
17
|
+
def _cosine_similarity(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
|
|
18
|
+
"""Cosine similarity between row vectors of a and b."""
|
|
19
|
+
a_norm = a / np.linalg.norm(a, axis=1, keepdims=True)
|
|
20
|
+
b_norm = b / np.linalg.norm(b, axis=1, keepdims=True)
|
|
21
|
+
return a_norm @ b_norm.T
|
|
22
|
+
|
|
23
|
+
def rank_by_similarity(
|
|
24
|
+
self, names: list[str], reference_phrases: list[str], top_k: int = 5
|
|
25
|
+
) -> list[tuple[int, str, float]]:
|
|
26
|
+
"""Rank names by max similarity to any reference phrase.
|
|
27
|
+
|
|
28
|
+
Returns list of (original_index, name, score) sorted by score descending.
|
|
29
|
+
"""
|
|
30
|
+
if not names:
|
|
31
|
+
return []
|
|
32
|
+
name_emb = self._model.encode(names)
|
|
33
|
+
ref_emb = self._model.encode(reference_phrases)
|
|
34
|
+
sims = self._cosine_similarity(name_emb, ref_emb)
|
|
35
|
+
max_scores = sims.max(axis=1)
|
|
36
|
+
ranked_indices = np.argsort(-max_scores)[:top_k]
|
|
37
|
+
return [(int(i), names[i], float(max_scores[i])) for i in ranked_indices]
|
|
38
|
+
|
|
39
|
+
def classify(
|
|
40
|
+
self, names: list[str], categories: list[str]
|
|
41
|
+
) -> list[tuple[str, str, float]]:
|
|
42
|
+
"""Assign each name to its closest category.
|
|
43
|
+
|
|
44
|
+
Returns list of (name, category, score) preserving input order.
|
|
45
|
+
"""
|
|
46
|
+
if not names:
|
|
47
|
+
return []
|
|
48
|
+
name_emb = self._model.encode(names)
|
|
49
|
+
cat_emb = self._model.encode(categories)
|
|
50
|
+
sims = self._cosine_similarity(name_emb, cat_emb)
|
|
51
|
+
best_cat = sims.argmax(axis=1)
|
|
52
|
+
return [
|
|
53
|
+
(names[i], categories[best_cat[i]], float(sims[i, best_cat[i]]))
|
|
54
|
+
for i in range(len(names))
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
def filter_by_similarity(
|
|
58
|
+
self, names: list[str], reference_phrases: list[str], threshold: float = 0.3
|
|
59
|
+
) -> list[tuple[int, str, float]]:
|
|
60
|
+
"""Return names above similarity threshold to any reference phrase.
|
|
61
|
+
|
|
62
|
+
Returns list of (original_index, name, score) sorted by score descending.
|
|
63
|
+
"""
|
|
64
|
+
ranked = self.rank_by_similarity(names, reference_phrases, top_k=len(names))
|
|
65
|
+
return [(i, n, s) for i, n, s in ranked if s >= threshold]
|
volca/client.py
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
"""HTTP client for all VoLCA API endpoints."""
|
|
2
|
+
|
|
3
|
+
import requests
|
|
4
|
+
|
|
5
|
+
from .types import Activity, SupplyChain
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _substitution_body(substitutions: list[dict]) -> dict:
|
|
9
|
+
"""Build request body for substitution endpoints."""
|
|
10
|
+
return {
|
|
11
|
+
"srSubstitutions": [
|
|
12
|
+
{"subFrom": s["from"], "subTo": s["to"], "subConsumer": s["consumer"]}
|
|
13
|
+
for s in substitutions
|
|
14
|
+
]
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Client:
|
|
19
|
+
"""HTTP client for the VoLCA REST API.
|
|
20
|
+
|
|
21
|
+
Usage::
|
|
22
|
+
|
|
23
|
+
c = Client(db="agribalyse-3.2", password="1234")
|
|
24
|
+
plants = c.search_activities(name="at plant")
|
|
25
|
+
chain = c.get_supply_chain(plants[0].process_id, name="at farm")
|
|
26
|
+
|
|
27
|
+
Substitutions can be passed to get_supply_chain, get_inventory, get_lcia,
|
|
28
|
+
and get_lcia_batch to compute results with Sherman-Morrison rank-1 updates::
|
|
29
|
+
|
|
30
|
+
subs = [{"from": old_pid, "to": new_pid, "consumer": consumer_pid}]
|
|
31
|
+
result = c.get_lcia(pid, method_id, substitutions=subs)
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, base_url: str = "http://localhost:8081", db: str = "", password: str = ""):
|
|
35
|
+
self.base_url = base_url.rstrip("/")
|
|
36
|
+
self.db = db
|
|
37
|
+
self._session = requests.Session()
|
|
38
|
+
if password:
|
|
39
|
+
self._session.headers["Authorization"] = f"Bearer {password}"
|
|
40
|
+
|
|
41
|
+
def _db_url(self, path: str) -> str:
|
|
42
|
+
return f"{self.base_url}/api/v1/db/{self.db}/{path}"
|
|
43
|
+
|
|
44
|
+
def _api_url(self, path: str) -> str:
|
|
45
|
+
return f"{self.base_url}/api/v1/{path}"
|
|
46
|
+
|
|
47
|
+
def use(self, db_name: str) -> "Client":
|
|
48
|
+
"""Return a new client targeting a different database (shares session)."""
|
|
49
|
+
c = Client.__new__(Client)
|
|
50
|
+
c.base_url = self.base_url
|
|
51
|
+
c.db = db_name
|
|
52
|
+
c._session = self._session
|
|
53
|
+
return c
|
|
54
|
+
|
|
55
|
+
# -- Server info --
|
|
56
|
+
|
|
57
|
+
def get_version(self) -> dict:
|
|
58
|
+
"""Return server version info (version, gitHash, gitTag, buildTarget)."""
|
|
59
|
+
r = self._session.get(self._api_url("version"))
|
|
60
|
+
r.raise_for_status()
|
|
61
|
+
return r.json()
|
|
62
|
+
|
|
63
|
+
# -- Database management --
|
|
64
|
+
|
|
65
|
+
def list_databases(self) -> list[dict]:
|
|
66
|
+
r = self._session.get(self._api_url("db"))
|
|
67
|
+
r.raise_for_status()
|
|
68
|
+
return r.json()["dlrDatabases"]
|
|
69
|
+
|
|
70
|
+
def load_database(self, db_name: str) -> dict:
|
|
71
|
+
r = self._session.post(self._api_url(f"db/{db_name}/load"))
|
|
72
|
+
r.raise_for_status()
|
|
73
|
+
return r.json()
|
|
74
|
+
|
|
75
|
+
def unload_database(self, db_name: str) -> dict:
|
|
76
|
+
r = self._session.post(self._api_url(f"db/{db_name}/unload"))
|
|
77
|
+
r.raise_for_status()
|
|
78
|
+
return r.json()
|
|
79
|
+
|
|
80
|
+
# -- Search --
|
|
81
|
+
|
|
82
|
+
def search_activities(
|
|
83
|
+
self,
|
|
84
|
+
name: str | None = None,
|
|
85
|
+
geo: str | None = None,
|
|
86
|
+
product: str | None = None,
|
|
87
|
+
classification: str | None = None,
|
|
88
|
+
classification_value: str | None = None,
|
|
89
|
+
limit: int | None = None,
|
|
90
|
+
offset: int = 0,
|
|
91
|
+
) -> list[Activity]:
|
|
92
|
+
params: dict = {"offset": offset}
|
|
93
|
+
if limit is not None:
|
|
94
|
+
params["limit"] = limit
|
|
95
|
+
if name:
|
|
96
|
+
params["name"] = name
|
|
97
|
+
if geo:
|
|
98
|
+
params["geo"] = geo
|
|
99
|
+
if product:
|
|
100
|
+
params["product"] = product
|
|
101
|
+
if classification:
|
|
102
|
+
params["classification"] = classification
|
|
103
|
+
if classification_value:
|
|
104
|
+
params["classification-value"] = classification_value
|
|
105
|
+
r = self._session.get(self._db_url("activities"), params=params)
|
|
106
|
+
r.raise_for_status()
|
|
107
|
+
return [Activity.from_json(a) for a in r.json()["srResults"]]
|
|
108
|
+
|
|
109
|
+
def get_classifications(self) -> list[dict]:
|
|
110
|
+
"""List all classification systems and their values for the current database."""
|
|
111
|
+
r = self._session.get(self._db_url("classifications"))
|
|
112
|
+
r.raise_for_status()
|
|
113
|
+
return r.json()
|
|
114
|
+
|
|
115
|
+
def search_flows(self, query: str | None = None, limit: int | None = None) -> list[dict]:
|
|
116
|
+
params: dict = {}
|
|
117
|
+
if limit is not None:
|
|
118
|
+
params["limit"] = limit
|
|
119
|
+
if query:
|
|
120
|
+
params["q"] = query
|
|
121
|
+
r = self._session.get(self._db_url("flows"), params=params)
|
|
122
|
+
r.raise_for_status()
|
|
123
|
+
return r.json()["srResults"]
|
|
124
|
+
|
|
125
|
+
# -- Activity details --
|
|
126
|
+
|
|
127
|
+
def get_activity(self, process_id: str) -> dict:
|
|
128
|
+
r = self._session.get(self._db_url(f"activity/{process_id}"))
|
|
129
|
+
r.raise_for_status()
|
|
130
|
+
return r.json()
|
|
131
|
+
|
|
132
|
+
def get_inputs(self, process_id: str) -> list[dict]:
|
|
133
|
+
r = self._session.get(self._db_url(f"activity/{process_id}/inputs"))
|
|
134
|
+
r.raise_for_status()
|
|
135
|
+
return r.json()
|
|
136
|
+
|
|
137
|
+
def get_outputs(self, process_id: str) -> list[dict]:
|
|
138
|
+
r = self._session.get(self._db_url(f"activity/{process_id}/outputs"))
|
|
139
|
+
r.raise_for_status()
|
|
140
|
+
return r.json()
|
|
141
|
+
|
|
142
|
+
# -- Supply chain (scaling vector based) --
|
|
143
|
+
|
|
144
|
+
def get_supply_chain(
|
|
145
|
+
self,
|
|
146
|
+
process_id: str,
|
|
147
|
+
name: str | None = None,
|
|
148
|
+
limit: int | None = None,
|
|
149
|
+
min_quantity: float = 0,
|
|
150
|
+
substitutions: list[dict] | None = None,
|
|
151
|
+
) -> SupplyChain:
|
|
152
|
+
params: dict = {}
|
|
153
|
+
if limit is not None:
|
|
154
|
+
params["limit"] = limit
|
|
155
|
+
if name:
|
|
156
|
+
params["name"] = name
|
|
157
|
+
if min_quantity > 0:
|
|
158
|
+
params["min-quantity"] = min_quantity
|
|
159
|
+
url = self._db_url(f"activity/{process_id}/supply-chain")
|
|
160
|
+
if substitutions:
|
|
161
|
+
r = self._session.post(url, params=params, json=_substitution_body(substitutions))
|
|
162
|
+
else:
|
|
163
|
+
r = self._session.get(url, params=params)
|
|
164
|
+
r.raise_for_status()
|
|
165
|
+
return SupplyChain.from_json(r.json())
|
|
166
|
+
|
|
167
|
+
# -- Consumers (reverse supply chain) --
|
|
168
|
+
|
|
169
|
+
def get_consumers(
|
|
170
|
+
self,
|
|
171
|
+
process_id: str,
|
|
172
|
+
name: str | None = None,
|
|
173
|
+
limit: int | None = None,
|
|
174
|
+
) -> list[Activity]:
|
|
175
|
+
"""Find all activities that transitively depend on this supplier."""
|
|
176
|
+
params: dict = {}
|
|
177
|
+
if name:
|
|
178
|
+
params["name"] = name
|
|
179
|
+
if limit is not None:
|
|
180
|
+
params["limit"] = limit
|
|
181
|
+
r = self._session.get(self._db_url(f"activity/{process_id}/consumers"), params=params)
|
|
182
|
+
r.raise_for_status()
|
|
183
|
+
return [Activity.from_json(a) for a in r.json()]
|
|
184
|
+
|
|
185
|
+
# -- Tree --
|
|
186
|
+
|
|
187
|
+
def get_tree(self, process_id: str) -> dict:
|
|
188
|
+
r = self._session.get(self._db_url(f"activity/{process_id}/tree"))
|
|
189
|
+
r.raise_for_status()
|
|
190
|
+
return r.json()
|
|
191
|
+
|
|
192
|
+
# -- Inventory & LCIA --
|
|
193
|
+
|
|
194
|
+
def get_inventory(self, process_id: str, substitutions: list[dict] | None = None) -> dict:
|
|
195
|
+
url = self._db_url(f"activity/{process_id}/inventory")
|
|
196
|
+
if substitutions:
|
|
197
|
+
r = self._session.post(url, json=_substitution_body(substitutions))
|
|
198
|
+
else:
|
|
199
|
+
r = self._session.get(url)
|
|
200
|
+
r.raise_for_status()
|
|
201
|
+
return r.json()
|
|
202
|
+
|
|
203
|
+
def get_lcia(self, process_id: str, method_id: str, substitutions: list[dict] | None = None) -> dict:
|
|
204
|
+
url = self._db_url(f"activity/{process_id}/lcia/{method_id}")
|
|
205
|
+
if substitutions:
|
|
206
|
+
r = self._session.post(url, json=_substitution_body(substitutions))
|
|
207
|
+
else:
|
|
208
|
+
r = self._session.get(url)
|
|
209
|
+
r.raise_for_status()
|
|
210
|
+
return r.json()
|
|
211
|
+
|
|
212
|
+
def get_lcia_batch(self, process_id: str, collection: str, substitutions: list[dict] | None = None) -> dict:
|
|
213
|
+
url = self._db_url(f"activity/{process_id}/lcia-batch/{collection}")
|
|
214
|
+
if substitutions:
|
|
215
|
+
r = self._session.post(url, json=_substitution_body(substitutions))
|
|
216
|
+
else:
|
|
217
|
+
r = self._session.get(url)
|
|
218
|
+
r.raise_for_status()
|
|
219
|
+
return r.json()
|
volca/server.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""Server lifecycle management for VoLCA."""
|
|
2
|
+
|
|
3
|
+
import shutil
|
|
4
|
+
import subprocess
|
|
5
|
+
import time
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
import requests
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
import tomllib # Python 3.11+
|
|
12
|
+
except ModuleNotFoundError:
|
|
13
|
+
import tomli as tomllib # type: ignore[no-redef]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Server:
|
|
17
|
+
"""Manages the VoLCA server process.
|
|
18
|
+
|
|
19
|
+
Usage::
|
|
20
|
+
|
|
21
|
+
with Server(config="volca.toml") as srv:
|
|
22
|
+
client = Client(base_url=srv.base_url, db="agribalyse-3.2", password=srv.password)
|
|
23
|
+
activities = client.search_activities(name="at plant")
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, config: str = "volca.toml", port: int = 0, binary: str = "volca"):
|
|
27
|
+
self.config = config
|
|
28
|
+
self.binary = binary
|
|
29
|
+
self._process: subprocess.Popen | None = None
|
|
30
|
+
|
|
31
|
+
# Read port and password from config
|
|
32
|
+
cfg = self._read_config()
|
|
33
|
+
server_cfg = cfg.get("server", {})
|
|
34
|
+
self.port = port or server_cfg.get("port", 8081)
|
|
35
|
+
self.password = server_cfg.get("password", "")
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def base_url(self) -> str:
|
|
39
|
+
return f"http://localhost:{self.port}"
|
|
40
|
+
|
|
41
|
+
def _read_config(self) -> dict:
|
|
42
|
+
"""Read the TOML config file."""
|
|
43
|
+
try:
|
|
44
|
+
with open(self.config, "rb") as f:
|
|
45
|
+
return tomllib.load(f)
|
|
46
|
+
except FileNotFoundError:
|
|
47
|
+
return {}
|
|
48
|
+
|
|
49
|
+
def _auth_headers(self) -> dict:
|
|
50
|
+
if self.password:
|
|
51
|
+
return {"Authorization": f"Bearer {self.password}"}
|
|
52
|
+
return {}
|
|
53
|
+
|
|
54
|
+
def _find_binary(self) -> str:
|
|
55
|
+
"""Find the volca binary: explicit path, package bin/, or PATH."""
|
|
56
|
+
if Path(self.binary).exists():
|
|
57
|
+
return self.binary
|
|
58
|
+
found = shutil.which(self.binary)
|
|
59
|
+
if found:
|
|
60
|
+
return found
|
|
61
|
+
# Try common locations
|
|
62
|
+
for candidate in ["./volca", "./dist/volca"]:
|
|
63
|
+
if Path(candidate).exists():
|
|
64
|
+
return candidate
|
|
65
|
+
raise FileNotFoundError(
|
|
66
|
+
f"Cannot find '{self.binary}' binary. "
|
|
67
|
+
"Set binary= parameter or add volca to PATH."
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
def is_alive(self) -> bool:
|
|
71
|
+
"""Health check — GET /api/v1/db, return True if 200."""
|
|
72
|
+
try:
|
|
73
|
+
r = requests.get(
|
|
74
|
+
f"{self.base_url}/api/v1/db",
|
|
75
|
+
headers=self._auth_headers(),
|
|
76
|
+
timeout=2,
|
|
77
|
+
)
|
|
78
|
+
return r.status_code == 200
|
|
79
|
+
except requests.ConnectionError:
|
|
80
|
+
return False
|
|
81
|
+
|
|
82
|
+
def start(self, idle_timeout: int = 300, wait_timeout: int = 120) -> None:
|
|
83
|
+
"""Start server if not running. Wait until ready."""
|
|
84
|
+
if self.is_alive():
|
|
85
|
+
return
|
|
86
|
+
|
|
87
|
+
binary = self._find_binary()
|
|
88
|
+
cmd = [
|
|
89
|
+
binary,
|
|
90
|
+
"--config", self.config,
|
|
91
|
+
"server",
|
|
92
|
+
"--port", str(self.port),
|
|
93
|
+
"--idle-timeout", str(idle_timeout),
|
|
94
|
+
]
|
|
95
|
+
self._process = subprocess.Popen(
|
|
96
|
+
cmd,
|
|
97
|
+
stdout=subprocess.DEVNULL,
|
|
98
|
+
stderr=subprocess.DEVNULL,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# Poll until server is ready
|
|
102
|
+
deadline = time.monotonic() + wait_timeout
|
|
103
|
+
while time.monotonic() < deadline:
|
|
104
|
+
if self.is_alive():
|
|
105
|
+
return
|
|
106
|
+
time.sleep(0.5)
|
|
107
|
+
|
|
108
|
+
raise TimeoutError(
|
|
109
|
+
f"Server did not become ready within {wait_timeout}s"
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
def stop(self) -> None:
|
|
113
|
+
"""Stop the server via shutdown endpoint, then terminate process."""
|
|
114
|
+
try:
|
|
115
|
+
requests.post(
|
|
116
|
+
f"{self.base_url}/api/v1/shutdown",
|
|
117
|
+
headers=self._auth_headers(),
|
|
118
|
+
timeout=5,
|
|
119
|
+
)
|
|
120
|
+
except requests.ConnectionError:
|
|
121
|
+
pass
|
|
122
|
+
if self._process:
|
|
123
|
+
self._process.terminate()
|
|
124
|
+
self._process.wait(timeout=10)
|
|
125
|
+
self._process = None
|
|
126
|
+
|
|
127
|
+
def __enter__(self) -> "Server":
|
|
128
|
+
self.start()
|
|
129
|
+
return self
|
|
130
|
+
|
|
131
|
+
def __exit__(self, *_) -> None:
|
|
132
|
+
self.stop()
|
volca/types.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Data types for VoLCA API responses."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class Activity:
|
|
8
|
+
process_id: str
|
|
9
|
+
name: str
|
|
10
|
+
location: str
|
|
11
|
+
product: str
|
|
12
|
+
product_amount: float
|
|
13
|
+
product_unit: str
|
|
14
|
+
|
|
15
|
+
@classmethod
|
|
16
|
+
def from_json(cls, d: dict) -> "Activity":
|
|
17
|
+
return cls(
|
|
18
|
+
process_id=d["prsId"],
|
|
19
|
+
name=d["prsName"],
|
|
20
|
+
location=d["prsLocation"],
|
|
21
|
+
product=d["prsProduct"],
|
|
22
|
+
product_amount=d["prsProductAmount"],
|
|
23
|
+
product_unit=d["prsProductUnit"],
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class SupplyChainEntry:
|
|
29
|
+
process_id: str
|
|
30
|
+
name: str
|
|
31
|
+
location: str
|
|
32
|
+
quantity: float
|
|
33
|
+
unit: str
|
|
34
|
+
scaling_factor: float
|
|
35
|
+
classifications: dict[str, str] = field(default_factory=dict)
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def from_json(cls, d: dict) -> "SupplyChainEntry":
|
|
39
|
+
return cls(
|
|
40
|
+
process_id=d["sceProcessId"],
|
|
41
|
+
name=d["sceName"],
|
|
42
|
+
location=d["sceLocation"],
|
|
43
|
+
quantity=d["sceQuantity"],
|
|
44
|
+
unit=d["sceUnit"],
|
|
45
|
+
scaling_factor=d["sceScalingFactor"],
|
|
46
|
+
classifications=d.get("sceClassifications", {}),
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class SupplyChainEdge:
|
|
52
|
+
from_id: str
|
|
53
|
+
to_id: str
|
|
54
|
+
amount: float
|
|
55
|
+
|
|
56
|
+
@classmethod
|
|
57
|
+
def from_json(cls, d: dict) -> "SupplyChainEdge":
|
|
58
|
+
return cls(
|
|
59
|
+
from_id=d["sceEdgeFrom"],
|
|
60
|
+
to_id=d["sceEdgeTo"],
|
|
61
|
+
amount=d["sceEdgeAmount"],
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass
|
|
66
|
+
class SupplyChain:
|
|
67
|
+
root: Activity
|
|
68
|
+
total_activities: int
|
|
69
|
+
filtered_activities: int
|
|
70
|
+
entries: list[SupplyChainEntry] = field(default_factory=list)
|
|
71
|
+
edges: list[SupplyChainEdge] = field(default_factory=list)
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def from_json(cls, d: dict) -> "SupplyChain":
|
|
75
|
+
return cls(
|
|
76
|
+
root=Activity.from_json(d["scrRoot"]),
|
|
77
|
+
total_activities=d["scrTotalActivities"],
|
|
78
|
+
filtered_activities=d["scrFilteredActivities"],
|
|
79
|
+
entries=[SupplyChainEntry.from_json(e) for e in d["scrSupplyChain"]],
|
|
80
|
+
edges=[SupplyChainEdge.from_json(e) for e in d.get("scrEdges", [])],
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|