matterviz 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/BohrAtom.svelte +105 -0
- package/dist/BohrAtom.svelte.d.ts +21 -0
- package/dist/ControlPanel.svelte +158 -0
- package/dist/ControlPanel.svelte.d.ts +18 -0
- package/dist/Icon.svelte +23 -0
- package/dist/Icon.svelte.d.ts +8 -0
- package/dist/InfoCard.svelte +79 -0
- package/dist/InfoCard.svelte.d.ts +23 -0
- package/dist/Nucleus.svelte +64 -0
- package/dist/Nucleus.svelte.d.ts +16 -0
- package/dist/Spinner.svelte +44 -0
- package/dist/Spinner.svelte.d.ts +7 -0
- package/dist/api.d.ts +6 -0
- package/dist/api.js +30 -0
- package/dist/colors/alloy-colors.json +111 -0
- package/dist/colors/dark-mode-colors.json +111 -0
- package/dist/colors/index.d.ts +26 -0
- package/dist/colors/index.js +72 -0
- package/dist/colors/jmol-colors.json +111 -0
- package/dist/colors/muted-colors.json +111 -0
- package/dist/colors/pastel-colors.json +111 -0
- package/dist/colors/vesta-colors.json +111 -0
- package/dist/composition/BarChart.svelte +260 -0
- package/dist/composition/BarChart.svelte.d.ts +33 -0
- package/dist/composition/BubbleChart.svelte +166 -0
- package/dist/composition/BubbleChart.svelte.d.ts +30 -0
- package/dist/composition/Composition.svelte +73 -0
- package/dist/composition/Composition.svelte.d.ts +27 -0
- package/dist/composition/PieChart.svelte +236 -0
- package/dist/composition/PieChart.svelte.d.ts +36 -0
- package/dist/composition/index.d.ts +5 -0
- package/dist/composition/index.js +5 -0
- package/dist/composition/parse.d.ts +14 -0
- package/dist/composition/parse.js +307 -0
- package/dist/element/ElementHeading.svelte +21 -0
- package/dist/element/ElementHeading.svelte.d.ts +8 -0
- package/dist/element/ElementPhoto.svelte +56 -0
- package/dist/element/ElementPhoto.svelte.d.ts +9 -0
- package/dist/element/ElementStats.svelte +73 -0
- package/dist/element/ElementStats.svelte.d.ts +8 -0
- package/dist/element/ElementTile.svelte +449 -0
- package/dist/element/ElementTile.svelte.d.ts +25 -0
- package/dist/element/data.d.ts +4958 -0
- package/dist/element/data.js +5628 -0
- package/dist/element/index.d.ts +4 -0
- package/dist/element/index.js +4 -0
- package/dist/icons.d.ts +435 -0
- package/dist/icons.js +435 -0
- package/dist/index.d.ts +82 -0
- package/dist/index.js +43 -0
- package/dist/io/decompress.d.ts +16 -0
- package/dist/io/decompress.js +78 -0
- package/dist/io/export.d.ts +9 -0
- package/dist/io/export.js +205 -0
- package/dist/io/parse.d.ts +53 -0
- package/dist/io/parse.js +747 -0
- package/dist/labels.d.ts +31 -0
- package/dist/labels.js +209 -0
- package/dist/material/MaterialCard.svelte +135 -0
- package/dist/material/MaterialCard.svelte.d.ts +10 -0
- package/dist/material/SymmetryCard.svelte +23 -0
- package/dist/material/SymmetryCard.svelte.d.ts +9 -0
- package/dist/material/index.d.ts +2 -0
- package/dist/material/index.js +2 -0
- package/dist/math.d.ts +24 -0
- package/dist/math.js +216 -0
- package/dist/periodic-table/PeriodicTable.svelte +284 -0
- package/dist/periodic-table/PeriodicTable.svelte.d.ts +50 -0
- package/dist/periodic-table/PropertySelect.svelte +20 -0
- package/dist/periodic-table/PropertySelect.svelte.d.ts +13 -0
- package/dist/periodic-table/TableInset.svelte +18 -0
- package/dist/periodic-table/TableInset.svelte.d.ts +9 -0
- package/dist/periodic-table/index.d.ts +9 -0
- package/dist/periodic-table/index.js +3 -0
- package/dist/plot/ColorBar.svelte +414 -0
- package/dist/plot/ColorBar.svelte.d.ts +22 -0
- package/dist/plot/ColorScaleSelect.svelte +31 -0
- package/dist/plot/ColorScaleSelect.svelte.d.ts +15 -0
- package/dist/plot/ElementScatter.svelte +38 -0
- package/dist/plot/ElementScatter.svelte.d.ts +14 -0
- package/dist/plot/Line.svelte +42 -0
- package/dist/plot/Line.svelte.d.ts +15 -0
- package/dist/plot/PlotLegend.svelte +206 -0
- package/dist/plot/PlotLegend.svelte.d.ts +18 -0
- package/dist/plot/ScatterPlot.svelte +1753 -0
- package/dist/plot/ScatterPlot.svelte.d.ts +114 -0
- package/dist/plot/ScatterPlotControls.svelte +505 -0
- package/dist/plot/ScatterPlotControls.svelte.d.ts +33 -0
- package/dist/plot/ScatterPoint.svelte +72 -0
- package/dist/plot/ScatterPoint.svelte.d.ts +17 -0
- package/dist/plot/index.d.ts +168 -0
- package/dist/plot/index.js +46 -0
- package/dist/state.svelte.d.ts +12 -0
- package/dist/state.svelte.js +11 -0
- package/dist/structure/Bond.svelte +68 -0
- package/dist/structure/Bond.svelte.d.ts +13 -0
- package/dist/structure/Lattice.svelte +115 -0
- package/dist/structure/Lattice.svelte.d.ts +15 -0
- package/dist/structure/Structure.svelte +298 -0
- package/dist/structure/Structure.svelte.d.ts +28 -0
- package/dist/structure/StructureCard.svelte +26 -0
- package/dist/structure/StructureCard.svelte.d.ts +9 -0
- package/dist/structure/StructureControls.svelte +383 -0
- package/dist/structure/StructureControls.svelte.d.ts +23 -0
- package/dist/structure/StructureLegend.svelte +130 -0
- package/dist/structure/StructureLegend.svelte.d.ts +17 -0
- package/dist/structure/StructureScene.svelte +331 -0
- package/dist/structure/StructureScene.svelte.d.ts +47 -0
- package/dist/structure/bonding.d.ts +16 -0
- package/dist/structure/bonding.js +150 -0
- package/dist/structure/index.d.ts +98 -0
- package/dist/structure/index.js +114 -0
- package/dist/structure/pbc.d.ts +6 -0
- package/dist/structure/pbc.js +72 -0
- package/dist/trajectory/Sidebar.svelte +412 -0
- package/dist/trajectory/Sidebar.svelte.d.ts +14 -0
- package/dist/trajectory/Trajectory.svelte +1084 -0
- package/dist/trajectory/Trajectory.svelte.d.ts +49 -0
- package/dist/trajectory/TrajectoryError.svelte +120 -0
- package/dist/trajectory/TrajectoryError.svelte.d.ts +12 -0
- package/dist/trajectory/extract.d.ts +5 -0
- package/dist/trajectory/extract.js +157 -0
- package/dist/trajectory/index.d.ts +16 -0
- package/dist/trajectory/index.js +49 -0
- package/dist/trajectory/parse.d.ts +13 -0
- package/dist/trajectory/parse.js +1093 -0
- package/dist/trajectory/plotting.d.ts +12 -0
- package/dist/trajectory/plotting.js +148 -0
- package/license +21 -0
- package/package.json +131 -0
- package/readme.md +95 -0
|
@@ -0,0 +1,1093 @@
|
|
|
1
|
+
import { escape_html, is_binary } from '..';
|
|
2
|
+
import { parse_xyz } from '../io/parse';
|
|
3
|
+
import * as math from '../math';
|
|
4
|
+
import * as h5wasm from 'h5wasm';
|
|
5
|
+
// Cache for matrix inversions to avoid repeated calculations
|
|
6
|
+
const matrix_inversion_cache = new WeakMap();
|
|
7
|
+
// Cached matrix inversion for coordinate transformations
|
|
8
|
+
function get_inverse_matrix(matrix) {
|
|
9
|
+
// Check cache first
|
|
10
|
+
const cached = matrix_inversion_cache.get(matrix);
|
|
11
|
+
if (cached)
|
|
12
|
+
return cached;
|
|
13
|
+
// Use the shared matrix_inverse_3x3 function
|
|
14
|
+
const inverse = math.matrix_inverse_3x3(matrix);
|
|
15
|
+
// Cache the result
|
|
16
|
+
matrix_inversion_cache.set(matrix, inverse);
|
|
17
|
+
return inverse;
|
|
18
|
+
}
|
|
19
|
+
// Helper to convert ArrayBuffer to base64 data URL
|
|
20
|
+
export function array_buffer_to_data_url(buffer) {
|
|
21
|
+
const bytes = new Uint8Array(buffer);
|
|
22
|
+
const base64 = btoa(String.fromCharCode(...bytes));
|
|
23
|
+
return `data:application/octet-stream;base64,${base64}`;
|
|
24
|
+
}
|
|
25
|
+
// Helper to convert base64 data URL back to ArrayBuffer
|
|
26
|
+
export function data_url_to_array_buffer(data_url) {
|
|
27
|
+
const base64 = data_url.replace(`data:application/octet-stream;base64,`, ``);
|
|
28
|
+
const binary_string = atob(base64);
|
|
29
|
+
const bytes = new Uint8Array(binary_string.length);
|
|
30
|
+
for (let i = 0; i < binary_string.length; i++) {
|
|
31
|
+
bytes[i] = binary_string.charCodeAt(i);
|
|
32
|
+
}
|
|
33
|
+
return bytes.buffer;
|
|
34
|
+
}
|
|
35
|
+
// Utility function to load trajectory from URL with automatic format detection
|
|
36
|
+
export async function load_trajectory_from_url(url) {
|
|
37
|
+
const response = await fetch(url);
|
|
38
|
+
if (!response.ok) {
|
|
39
|
+
throw new Error(`Failed to fetch trajectory file: ${response.status}`);
|
|
40
|
+
}
|
|
41
|
+
// Check response headers to determine if decompression is needed
|
|
42
|
+
const content_encoding = response.headers.get(`content-encoding`);
|
|
43
|
+
const content_type = response.headers.get(`content-type`);
|
|
44
|
+
let filename = url.split(`/`).pop() || `trajectory`;
|
|
45
|
+
// Check if this is an HDF5 file first (regardless of content-encoding)
|
|
46
|
+
if ([`h5`, `hdf5`].includes(filename.toLowerCase().split(`.`).pop() || ``)) {
|
|
47
|
+
// Handle HDF5 files as binary: always use arrayBuffer()
|
|
48
|
+
const buffer = await response.arrayBuffer();
|
|
49
|
+
return await parse_trajectory_data(buffer, filename);
|
|
50
|
+
}
|
|
51
|
+
// For non-HDF5 files, handle based on content encoding
|
|
52
|
+
// If server sends gzip content-encoding, the browser auto-decompresses
|
|
53
|
+
// If content-type is application/json, it's likely already decompressed
|
|
54
|
+
if (content_encoding === `gzip` || content_type?.includes(`json`)) {
|
|
55
|
+
// Server already decompressed the content, use it directly
|
|
56
|
+
const content = await response.text();
|
|
57
|
+
// Remove .gz extension from filename if it exists
|
|
58
|
+
filename = filename.replace(/\.gz$/, ``);
|
|
59
|
+
return await parse_trajectory_data(content, filename);
|
|
60
|
+
}
|
|
61
|
+
else {
|
|
62
|
+
// Manual decompression needed (for cases where server sends raw gzip)
|
|
63
|
+
const { decompress_file } = await import(`$lib/io/decompress`);
|
|
64
|
+
const blob = await response.blob();
|
|
65
|
+
const file = new File([blob], filename, {
|
|
66
|
+
type: response.headers.get(`content-type`) || `application/octet-stream`,
|
|
67
|
+
});
|
|
68
|
+
const result = await decompress_file(file);
|
|
69
|
+
return await parse_trajectory_data(result.content, result.filename);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
// Atomic number to element symbol mapping
|
|
73
|
+
const ATOMIC_NUMBER_TO_SYMBOL = {
|
|
74
|
+
1: `H`,
|
|
75
|
+
2: `He`,
|
|
76
|
+
3: `Li`,
|
|
77
|
+
4: `Be`,
|
|
78
|
+
5: `B`,
|
|
79
|
+
6: `C`,
|
|
80
|
+
7: `N`,
|
|
81
|
+
8: `O`,
|
|
82
|
+
9: `F`,
|
|
83
|
+
10: `Ne`,
|
|
84
|
+
11: `Na`,
|
|
85
|
+
12: `Mg`,
|
|
86
|
+
13: `Al`,
|
|
87
|
+
14: `Si`,
|
|
88
|
+
15: `P`,
|
|
89
|
+
16: `S`,
|
|
90
|
+
17: `Cl`,
|
|
91
|
+
18: `Ar`,
|
|
92
|
+
19: `K`,
|
|
93
|
+
20: `Ca`,
|
|
94
|
+
21: `Sc`,
|
|
95
|
+
22: `Ti`,
|
|
96
|
+
23: `V`,
|
|
97
|
+
24: `Cr`,
|
|
98
|
+
25: `Mn`,
|
|
99
|
+
26: `Fe`,
|
|
100
|
+
27: `Co`,
|
|
101
|
+
28: `Ni`,
|
|
102
|
+
29: `Cu`,
|
|
103
|
+
30: `Zn`,
|
|
104
|
+
31: `Ga`,
|
|
105
|
+
32: `Ge`,
|
|
106
|
+
33: `As`,
|
|
107
|
+
34: `Se`,
|
|
108
|
+
35: `Br`,
|
|
109
|
+
36: `Kr`,
|
|
110
|
+
37: `Rb`,
|
|
111
|
+
38: `Sr`,
|
|
112
|
+
39: `Y`,
|
|
113
|
+
40: `Zr`,
|
|
114
|
+
41: `Nb`,
|
|
115
|
+
42: `Mo`,
|
|
116
|
+
43: `Tc`,
|
|
117
|
+
44: `Ru`,
|
|
118
|
+
45: `Rh`,
|
|
119
|
+
46: `Pd`,
|
|
120
|
+
47: `Ag`,
|
|
121
|
+
48: `Cd`,
|
|
122
|
+
49: `In`,
|
|
123
|
+
50: `Sn`,
|
|
124
|
+
51: `Sb`,
|
|
125
|
+
52: `Te`,
|
|
126
|
+
53: `I`,
|
|
127
|
+
54: `Xe`,
|
|
128
|
+
55: `Cs`,
|
|
129
|
+
56: `Ba`,
|
|
130
|
+
57: `La`,
|
|
131
|
+
58: `Ce`,
|
|
132
|
+
59: `Pr`,
|
|
133
|
+
60: `Nd`,
|
|
134
|
+
61: `Pm`,
|
|
135
|
+
62: `Sm`,
|
|
136
|
+
63: `Eu`,
|
|
137
|
+
64: `Gd`,
|
|
138
|
+
65: `Tb`,
|
|
139
|
+
66: `Dy`,
|
|
140
|
+
67: `Ho`,
|
|
141
|
+
68: `Er`,
|
|
142
|
+
69: `Tm`,
|
|
143
|
+
70: `Yb`,
|
|
144
|
+
71: `Lu`,
|
|
145
|
+
72: `Hf`,
|
|
146
|
+
73: `Ta`,
|
|
147
|
+
74: `W`,
|
|
148
|
+
75: `Re`,
|
|
149
|
+
76: `Os`,
|
|
150
|
+
77: `Ir`,
|
|
151
|
+
78: `Pt`,
|
|
152
|
+
79: `Au`,
|
|
153
|
+
80: `Hg`,
|
|
154
|
+
81: `Tl`,
|
|
155
|
+
82: `Pb`,
|
|
156
|
+
83: `Bi`,
|
|
157
|
+
84: `Po`,
|
|
158
|
+
85: `At`,
|
|
159
|
+
86: `Rn`,
|
|
160
|
+
87: `Fr`,
|
|
161
|
+
88: `Ra`,
|
|
162
|
+
89: `Ac`,
|
|
163
|
+
90: `Th`,
|
|
164
|
+
91: `Pa`,
|
|
165
|
+
92: `U`,
|
|
166
|
+
93: `Np`,
|
|
167
|
+
94: `Pu`,
|
|
168
|
+
95: `Am`,
|
|
169
|
+
96: `Cm`,
|
|
170
|
+
97: `Bk`,
|
|
171
|
+
98: `Cf`,
|
|
172
|
+
99: `Es`,
|
|
173
|
+
100: `Fm`,
|
|
174
|
+
101: `Md`,
|
|
175
|
+
102: `No`,
|
|
176
|
+
103: `Lr`,
|
|
177
|
+
104: `Rf`,
|
|
178
|
+
105: `Db`,
|
|
179
|
+
106: `Sg`,
|
|
180
|
+
107: `Bh`,
|
|
181
|
+
108: `Hs`,
|
|
182
|
+
109: `Mt`,
|
|
183
|
+
110: `Ds`,
|
|
184
|
+
111: `Rg`,
|
|
185
|
+
112: `Cn`,
|
|
186
|
+
113: `Nh`,
|
|
187
|
+
114: `Fl`,
|
|
188
|
+
115: `Mc`,
|
|
189
|
+
116: `Lv`,
|
|
190
|
+
117: `Ts`,
|
|
191
|
+
118: `Og`,
|
|
192
|
+
};
|
|
193
|
+
// Parse torch-sim HDF5 trajectory file
|
|
194
|
+
export async function parse_torch_sim_hdf5(buffer, filename) {
|
|
195
|
+
try {
|
|
196
|
+
// Initialize h5wasm
|
|
197
|
+
await h5wasm.ready;
|
|
198
|
+
const { FS } = await h5wasm.ready;
|
|
199
|
+
// Write buffer to virtual filesystem
|
|
200
|
+
const temp_filename = filename || `temp.h5`;
|
|
201
|
+
FS.writeFile(temp_filename, new Uint8Array(buffer));
|
|
202
|
+
// Open the file
|
|
203
|
+
const f = new h5wasm.File(temp_filename, `r`);
|
|
204
|
+
try {
|
|
205
|
+
// Validate torch-sim format by checking for required groups
|
|
206
|
+
const data_group = f.get(`data`);
|
|
207
|
+
if (!data_group) {
|
|
208
|
+
throw new Error(`Invalid torch-sim HDF5 format: missing data group`);
|
|
209
|
+
}
|
|
210
|
+
const data_group_keys = data_group.keys();
|
|
211
|
+
if (!data_group_keys.includes(`atomic_numbers`) ||
|
|
212
|
+
!data_group_keys.includes(`positions`)) {
|
|
213
|
+
throw new Error(`Invalid torch-sim HDF5 format: missing required datasets`);
|
|
214
|
+
}
|
|
215
|
+
// Read atomic numbers and convert to element symbols
|
|
216
|
+
const atomic_numbers_dataset = data_group.get(`atomic_numbers`);
|
|
217
|
+
if (!atomic_numbers_dataset) {
|
|
218
|
+
throw new Error(`Missing atomic_numbers dataset`);
|
|
219
|
+
}
|
|
220
|
+
const atomic_numbers_data = atomic_numbers_dataset.to_array();
|
|
221
|
+
const atom_numbers = atomic_numbers_data[0]; // First (and only) row
|
|
222
|
+
const elements = atom_numbers.map((num) => {
|
|
223
|
+
return ATOMIC_NUMBER_TO_SYMBOL[num] || `X`;
|
|
224
|
+
});
|
|
225
|
+
// Read positions data
|
|
226
|
+
const positions_dataset = data_group.get(`positions`);
|
|
227
|
+
if (!positions_dataset) {
|
|
228
|
+
throw new Error(`Missing positions dataset`);
|
|
229
|
+
}
|
|
230
|
+
const positions = positions_dataset.to_array();
|
|
231
|
+
// Read cell data if available
|
|
232
|
+
let cells;
|
|
233
|
+
try {
|
|
234
|
+
const cell_dataset = data_group.get(`cell`);
|
|
235
|
+
if (cell_dataset) {
|
|
236
|
+
cells = cell_dataset.to_array();
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
catch {
|
|
240
|
+
// Cell data might not be available
|
|
241
|
+
}
|
|
242
|
+
// Read energies if available
|
|
243
|
+
let potential_energies;
|
|
244
|
+
let kinetic_energies;
|
|
245
|
+
try {
|
|
246
|
+
const pe_dataset = data_group.get(`potential_energy`);
|
|
247
|
+
if (pe_dataset) {
|
|
248
|
+
const pe_array = pe_dataset.to_array();
|
|
249
|
+
potential_energies = pe_array.map((row) => row[0]);
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
catch {
|
|
253
|
+
// Potential energy might not be available
|
|
254
|
+
}
|
|
255
|
+
try {
|
|
256
|
+
const ke_dataset = data_group.get(`kinetic_energy`);
|
|
257
|
+
if (ke_dataset) {
|
|
258
|
+
const ke_array = ke_dataset.to_array();
|
|
259
|
+
kinetic_energies = ke_array.map((row) => row[0]);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
catch {
|
|
263
|
+
// Kinetic energy might not be available
|
|
264
|
+
}
|
|
265
|
+
// Get periodic boundary conditions if available
|
|
266
|
+
let pbc = [true, true, true]; // Default
|
|
267
|
+
try {
|
|
268
|
+
const pbc_dataset = data_group.get(`pbc`);
|
|
269
|
+
if (pbc_dataset) {
|
|
270
|
+
const pbc_array = pbc_dataset.to_array();
|
|
271
|
+
pbc = pbc_array.slice(0, 3).map((val) => val !== 0);
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
catch {
|
|
275
|
+
// PBC might not be available
|
|
276
|
+
}
|
|
277
|
+
const frames = positions.map((frame_positions, frame_idx) => {
|
|
278
|
+
// Get the lattice matrix for this frame
|
|
279
|
+
const lattice_matrix = (cells?.[frame_idx]
|
|
280
|
+
? (cells[frame_idx].map((row) => row.slice()))
|
|
281
|
+
: [[1, 0, 0], [0, 1, 0], [0, 0, 1]]);
|
|
282
|
+
const lattice_params = math.calc_lattice_params(lattice_matrix);
|
|
283
|
+
const lattice = { matrix: lattice_matrix, ...lattice_params, pbc };
|
|
284
|
+
// Cache inverse matrix for coordinate transformations
|
|
285
|
+
const inv_matrix = get_inverse_matrix(lattice_matrix);
|
|
286
|
+
// Create sites array in the expected format
|
|
287
|
+
const sites = frame_positions.map((xyz_pos, atom_idx) => {
|
|
288
|
+
// Convert Cartesian coordinates to fractional coordinates efficiently
|
|
289
|
+
const abc = math.mat3x3_vec3_multiply(inv_matrix, xyz_pos);
|
|
290
|
+
return {
|
|
291
|
+
species: [{ element: elements[atom_idx], occu: 1, oxidation_state: 0 }],
|
|
292
|
+
abc,
|
|
293
|
+
xyz: xyz_pos.slice(),
|
|
294
|
+
label: `${elements[atom_idx]}${atom_idx + 1}`,
|
|
295
|
+
properties: {},
|
|
296
|
+
};
|
|
297
|
+
});
|
|
298
|
+
const structure = { sites, lattice };
|
|
299
|
+
const metadata = {
|
|
300
|
+
volume: lattice.volume,
|
|
301
|
+
...(potential_energies &&
|
|
302
|
+
frame_idx < potential_energies.length && {
|
|
303
|
+
energy: potential_energies[frame_idx],
|
|
304
|
+
}),
|
|
305
|
+
...(kinetic_energies &&
|
|
306
|
+
frame_idx < kinetic_energies.length && {
|
|
307
|
+
kinetic_energy: kinetic_energies[frame_idx],
|
|
308
|
+
}),
|
|
309
|
+
};
|
|
310
|
+
return { structure, step: frame_idx, metadata };
|
|
311
|
+
});
|
|
312
|
+
// Get metadata if available
|
|
313
|
+
let title = `TorchSim Trajectory`;
|
|
314
|
+
let program = `Unknown`;
|
|
315
|
+
try {
|
|
316
|
+
const header_group = f.get(`header`);
|
|
317
|
+
title = header_group?.attrs?.title?.toString() ?? title;
|
|
318
|
+
program = header_group?.attrs?.program?.toString() ?? program;
|
|
319
|
+
}
|
|
320
|
+
catch {
|
|
321
|
+
// Header might not be available
|
|
322
|
+
}
|
|
323
|
+
// Count unique elements
|
|
324
|
+
const element_counts = {};
|
|
325
|
+
elements.forEach((element) => {
|
|
326
|
+
element_counts[element] = (element_counts[element] || 0) + 1;
|
|
327
|
+
});
|
|
328
|
+
return {
|
|
329
|
+
frames,
|
|
330
|
+
metadata: {
|
|
331
|
+
title,
|
|
332
|
+
program,
|
|
333
|
+
num_atoms: elements.length,
|
|
334
|
+
num_frames: frames.length,
|
|
335
|
+
periodic_boundary_conditions: pbc,
|
|
336
|
+
...(potential_energies && { has_energy: true }),
|
|
337
|
+
...(kinetic_energies && { has_kinetic_energy: true }),
|
|
338
|
+
element_counts,
|
|
339
|
+
},
|
|
340
|
+
};
|
|
341
|
+
}
|
|
342
|
+
finally {
|
|
343
|
+
f.close();
|
|
344
|
+
// Clean up temporary file
|
|
345
|
+
try {
|
|
346
|
+
FS.unlink(temp_filename);
|
|
347
|
+
}
|
|
348
|
+
catch {
|
|
349
|
+
// Ignore cleanup errors
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
catch (error) {
|
|
354
|
+
throw new Error(`Failed to parse torch-sim HDF5 file: ${error}`);
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
// Check if a file is a torch-sim HDF5 trajectory
|
|
358
|
+
export function is_torch_sim_hdf5(content, filename) {
|
|
359
|
+
// Check filename extension first
|
|
360
|
+
const has_hdf5_extension = filename &&
|
|
361
|
+
(filename.toLowerCase().endsWith(`.h5`) ||
|
|
362
|
+
filename.toLowerCase().endsWith(`.hdf5`));
|
|
363
|
+
if (filename && !has_hdf5_extension) {
|
|
364
|
+
return false;
|
|
365
|
+
}
|
|
366
|
+
// If we only have filename (no content), return based on extension
|
|
367
|
+
if (!content ||
|
|
368
|
+
(content instanceof ArrayBuffer && content.byteLength === 0)) {
|
|
369
|
+
return Boolean(has_hdf5_extension);
|
|
370
|
+
}
|
|
371
|
+
// Check if content is binary (HDF5 files are binary)
|
|
372
|
+
if (typeof content === `string`) {
|
|
373
|
+
return false; // HDF5 files should not be parsed as text
|
|
374
|
+
}
|
|
375
|
+
// Check for HDF5 signature at the beginning of the file
|
|
376
|
+
if (content instanceof ArrayBuffer && content.byteLength >= 8) {
|
|
377
|
+
const view = new Uint8Array(content.slice(0, 8));
|
|
378
|
+
// HDF5 signature: \211HDF\r\n\032\n
|
|
379
|
+
const hdf5_signature = [0x89, 0x48, 0x44, 0x46, 0x0d, 0x0a, 0x1a, 0x0a];
|
|
380
|
+
return hdf5_signature.every((byte, idx) => view[idx] === byte);
|
|
381
|
+
}
|
|
382
|
+
return false;
|
|
383
|
+
}
|
|
384
|
+
// Parse VASP XDATCAR format
|
|
385
|
+
export function parse_vasp_xdatcar(content) {
|
|
386
|
+
const lines = content.trim().split(/\r?\n/);
|
|
387
|
+
let line_idx = 0;
|
|
388
|
+
if (lines.length < 10) {
|
|
389
|
+
throw new Error(`XDATCAR file too short`);
|
|
390
|
+
}
|
|
391
|
+
// Parse header
|
|
392
|
+
const title = lines[line_idx++].trim();
|
|
393
|
+
const scale_factor = parseFloat(lines[line_idx++]);
|
|
394
|
+
if (isNaN(scale_factor)) {
|
|
395
|
+
throw new Error(`Invalid scale factor in XDATCAR`);
|
|
396
|
+
}
|
|
397
|
+
// Parse lattice vectors (3 lines)
|
|
398
|
+
const lattice_vectors = [[0, 0, 0], [0, 0, 0], [0, 0, 0]];
|
|
399
|
+
for (let i = 0; i < 3; i++) {
|
|
400
|
+
const coords = lines[line_idx++].trim().split(/\s+/).map(Number);
|
|
401
|
+
if (coords.length !== 3 || coords.some(isNaN)) {
|
|
402
|
+
throw new Error(`Invalid lattice vector at line ${line_idx}`);
|
|
403
|
+
}
|
|
404
|
+
lattice_vectors[i] = coords.map((x) => x * scale_factor);
|
|
405
|
+
}
|
|
406
|
+
const lattice_params = math.calc_lattice_params(lattice_vectors);
|
|
407
|
+
const lattice = { matrix: lattice_vectors, ...lattice_params, pbc: [true, true, true] };
|
|
408
|
+
// Parse element names and counts
|
|
409
|
+
const element_line = lines[line_idx++].trim().split(/\s+/);
|
|
410
|
+
const count_line = lines[line_idx++].trim().split(/\s+/).map(Number);
|
|
411
|
+
if (element_line.length !== count_line.length || count_line.some(isNaN)) {
|
|
412
|
+
throw new Error(`Element names and counts don't match`);
|
|
413
|
+
}
|
|
414
|
+
// Create element array for sites
|
|
415
|
+
const elements = [];
|
|
416
|
+
for (let i = 0; i < element_line.length; i++) {
|
|
417
|
+
for (let j = 0; j < count_line[i]; j++) {
|
|
418
|
+
elements.push(element_line[i]);
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
const total_atoms = count_line.reduce((sum, count) => sum + count, 0);
|
|
422
|
+
const frames = [];
|
|
423
|
+
// Parse configurations
|
|
424
|
+
while (line_idx < lines.length) {
|
|
425
|
+
// Look for configuration header
|
|
426
|
+
const config_line = lines[line_idx++];
|
|
427
|
+
if (!config_line || !config_line.includes(`Direct configuration=`)) {
|
|
428
|
+
continue;
|
|
429
|
+
}
|
|
430
|
+
const config_match = config_line.match(/configuration=\s*(\d+)/);
|
|
431
|
+
const step = config_match ? parseInt(config_match[1]) : frames.length + 1;
|
|
432
|
+
// Parse atomic positions
|
|
433
|
+
const sites = [];
|
|
434
|
+
for (let atom_idx = 0; atom_idx < total_atoms; atom_idx++) {
|
|
435
|
+
if (line_idx >= lines.length)
|
|
436
|
+
break;
|
|
437
|
+
const pos_line = lines[line_idx++].trim();
|
|
438
|
+
const parts = pos_line.split(/\s+/);
|
|
439
|
+
// Handle different XDATCAR formats:
|
|
440
|
+
// 1. Just coordinates: x y z
|
|
441
|
+
// 2. Coordinates with element: x y z Element
|
|
442
|
+
let coords;
|
|
443
|
+
let element;
|
|
444
|
+
if (parts.length >= 4 && isNaN(Number(parts[3]))) {
|
|
445
|
+
// Format: x y z Element
|
|
446
|
+
coords = parts.slice(0, 3).map(Number);
|
|
447
|
+
element = parts[3];
|
|
448
|
+
}
|
|
449
|
+
else {
|
|
450
|
+
// Format: x y z (use element from header)
|
|
451
|
+
coords = parts.slice(0, 3).map(Number);
|
|
452
|
+
element = elements[atom_idx];
|
|
453
|
+
}
|
|
454
|
+
if (coords.length < 3 || coords.some(isNaN)) {
|
|
455
|
+
console.warn(`Invalid coordinate line: ${pos_line}`);
|
|
456
|
+
continue;
|
|
457
|
+
}
|
|
458
|
+
const abc = [coords[0], coords[1], coords[2]];
|
|
459
|
+
// Convert fractional to Cartesian coordinates efficiently
|
|
460
|
+
const xyz = math.mat3x3_vec3_multiply(math.transpose_matrix(lattice_vectors), abc);
|
|
461
|
+
sites.push({
|
|
462
|
+
species: [{ element, occu: 1, oxidation_state: 0 }],
|
|
463
|
+
abc,
|
|
464
|
+
xyz,
|
|
465
|
+
label: `${element}${atom_idx + 1}`,
|
|
466
|
+
properties: {},
|
|
467
|
+
});
|
|
468
|
+
}
|
|
469
|
+
if (sites.length === total_atoms) {
|
|
470
|
+
frames.push({
|
|
471
|
+
structure: { sites, lattice },
|
|
472
|
+
step,
|
|
473
|
+
metadata: { volume: lattice.volume },
|
|
474
|
+
});
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
if (frames.length === 0) {
|
|
478
|
+
throw new Error(`No valid configurations found in XDATCAR`);
|
|
479
|
+
}
|
|
480
|
+
return {
|
|
481
|
+
frames,
|
|
482
|
+
metadata: {
|
|
483
|
+
title,
|
|
484
|
+
source_format: `vasp_xdatcar`,
|
|
485
|
+
frame_count: frames.length,
|
|
486
|
+
total_atoms,
|
|
487
|
+
elements: element_line,
|
|
488
|
+
element_counts: count_line,
|
|
489
|
+
},
|
|
490
|
+
};
|
|
491
|
+
}
|
|
492
|
+
// Detect if content is VASP XDATCAR format
|
|
493
|
+
export function is_vasp_xdatcar(content, filename) {
|
|
494
|
+
// Check filename patterns (XDATCAR files typically named "XDATCAR" or variants)
|
|
495
|
+
if (filename) {
|
|
496
|
+
const basename = filename.toLowerCase().split(`/`).pop() || ``;
|
|
497
|
+
if (basename === `xdatcar` || basename.startsWith(`xdatcar`)) {
|
|
498
|
+
return true;
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
// Check content patterns - XDATCAR files have a specific structure:
|
|
502
|
+
// 1. Title line
|
|
503
|
+
// 2. Scale factor (single number)
|
|
504
|
+
// 3. Three lattice vectors (3 numbers each)
|
|
505
|
+
// 4. Element names
|
|
506
|
+
// 5. Element counts
|
|
507
|
+
// 6. Configurations starting with "Direct configuration="
|
|
508
|
+
const lines = content.trim().split(/\r?\n/);
|
|
509
|
+
if (lines.length < 10)
|
|
510
|
+
return false;
|
|
511
|
+
// Look for "Direct configuration=" pattern which is unique to XDATCAR
|
|
512
|
+
const has_config_pattern = lines.some((line) => line.includes(`Direct configuration=`));
|
|
513
|
+
// Check if second line is a number (scale factor)
|
|
514
|
+
const second_line_is_number = !isNaN(parseFloat(lines[1]));
|
|
515
|
+
// Check if we have lattice vectors (lines 2-4 should be 3 numbers each)
|
|
516
|
+
let has_lattice_vectors = true;
|
|
517
|
+
for (let i = 2; i < 5 && i < lines.length; i++) {
|
|
518
|
+
const coords = lines[i].trim().split(/\s+/);
|
|
519
|
+
if (coords.length !== 3 ||
|
|
520
|
+
coords.some((coord) => isNaN(parseFloat(coord)))) {
|
|
521
|
+
has_lattice_vectors = false;
|
|
522
|
+
break;
|
|
523
|
+
}
|
|
524
|
+
}
|
|
525
|
+
return has_config_pattern && second_line_is_number && has_lattice_vectors;
|
|
526
|
+
}
|
|
527
|
+
// Parse multi-frame XYZ format for trajectories
|
|
528
|
+
export function parse_xyz_trajectory(content) {
|
|
529
|
+
const lines = content.trim().split(/\r?\n/);
|
|
530
|
+
let line_idx = 0;
|
|
531
|
+
const frames = [];
|
|
532
|
+
while (line_idx < lines.length) {
|
|
533
|
+
// Skip empty lines
|
|
534
|
+
if (!lines[line_idx] || lines[line_idx].trim() === ``) {
|
|
535
|
+
line_idx++;
|
|
536
|
+
continue;
|
|
537
|
+
}
|
|
538
|
+
// Parse number of atoms (line 1 of each frame)
|
|
539
|
+
const num_atoms_line = lines[line_idx]?.trim();
|
|
540
|
+
if (!num_atoms_line)
|
|
541
|
+
break;
|
|
542
|
+
const num_atoms = parseInt(num_atoms_line, 10);
|
|
543
|
+
if (isNaN(num_atoms) || num_atoms <= 0) {
|
|
544
|
+
line_idx++;
|
|
545
|
+
continue;
|
|
546
|
+
}
|
|
547
|
+
// Check if we have enough lines for this frame
|
|
548
|
+
if (line_idx + num_atoms + 1 >= lines.length)
|
|
549
|
+
break;
|
|
550
|
+
// Parse comment line (line 2 of each frame) - may contain metadata
|
|
551
|
+
line_idx++;
|
|
552
|
+
const comment_line = lines[line_idx] || ``;
|
|
553
|
+
const frame_metadata = {};
|
|
554
|
+
// Try to extract step number from comment
|
|
555
|
+
const step_match = comment_line.match(/step\s*[=:]?\s*(\d+)/i);
|
|
556
|
+
const frame_match = comment_line.match(/frame\s*[=:]?\s*(\d+)/i);
|
|
557
|
+
const ionic_step_match = comment_line.match(/ionic_step\s*[=:]?\s*(\d+)/i);
|
|
558
|
+
const step = step_match
|
|
559
|
+
? parseInt(step_match[1])
|
|
560
|
+
: frame_match
|
|
561
|
+
? parseInt(frame_match[1])
|
|
562
|
+
: ionic_step_match
|
|
563
|
+
? parseInt(ionic_step_match[1])
|
|
564
|
+
: frames.length;
|
|
565
|
+
// Extract various properties from extended XYZ comment line
|
|
566
|
+
// Map canonical property names to possible alternative names
|
|
567
|
+
const property_aliases = {
|
|
568
|
+
energy: [`energy`, `E`, `total_energy`, `etot`, `total_e`],
|
|
569
|
+
energy_per_atom: [`energy_per_atom`, `e_per_atom`, `energy/atom`, `epa`],
|
|
570
|
+
volume: [`volume`, `vol`, `V`, `cell_volume`],
|
|
571
|
+
pressure: [`pressure`, `P`, `press`],
|
|
572
|
+
temperature: [`temperature`, `temp`, `T`, `kelvin`],
|
|
573
|
+
bandgap: [`bandgap`, `E_gap`, `gap`, `band_gap`, `egap`, `bg`],
|
|
574
|
+
force_max: [`max_force`, `force_max`, `fmax`, `maximum_force`],
|
|
575
|
+
stress_max: [`max_stress`, `stress_max`, `maximum_stress`],
|
|
576
|
+
stress_frobenius: [`stress_frobenius`, `frobenius_stress`, `stress_frob`],
|
|
577
|
+
};
|
|
578
|
+
// First, try to extract properties from within the Properties= string (extended XYZ format)
|
|
579
|
+
const properties_match = comment_line.match(/Properties\s*=\s*"?([^"]*)"?/i);
|
|
580
|
+
if (properties_match) {
|
|
581
|
+
const properties_string = properties_match[1];
|
|
582
|
+
// Split the Properties string by spaces and look for property=value pairs
|
|
583
|
+
const property_parts = properties_string.split(/\s+/);
|
|
584
|
+
for (const part of property_parts) {
|
|
585
|
+
if (part.includes(`=`)) {
|
|
586
|
+
const [key, value] = part.split(`=`, 2);
|
|
587
|
+
const parsed_value = parseFloat(value);
|
|
588
|
+
if (!isNaN(parsed_value)) {
|
|
589
|
+
// Check if this key matches any of our canonical properties
|
|
590
|
+
for (const [canonical_name, aliases] of Object.entries(property_aliases)) {
|
|
591
|
+
if (aliases.some((alias) => key.toLowerCase() === alias.toLowerCase())) {
|
|
592
|
+
frame_metadata[canonical_name] = parsed_value;
|
|
593
|
+
break;
|
|
594
|
+
}
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
// Then, extract other properties that might be outside the Properties string
|
|
601
|
+
for (const [canonical_name, aliases] of Object.entries(property_aliases)) {
|
|
602
|
+
// Skip if we already found this property in the Properties string
|
|
603
|
+
if (canonical_name in frame_metadata)
|
|
604
|
+
continue;
|
|
605
|
+
let found_value;
|
|
606
|
+
// Try each alias until we find a match
|
|
607
|
+
for (const alias of aliases) {
|
|
608
|
+
const regex = new RegExp(`${alias}\\s*[=:]?\\s*([-+]?\\d*\\.?\\d+(?:[eE][-+]?\\d+)?)`, `i`);
|
|
609
|
+
const match = comment_line.match(regex);
|
|
610
|
+
if (match) {
|
|
611
|
+
found_value = parseFloat(match[1]);
|
|
612
|
+
break; // Stop at first match to avoid conflicts
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
if (found_value !== undefined && !isNaN(found_value)) {
|
|
616
|
+
frame_metadata[canonical_name] = found_value;
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
// Parse stress tensor if present (for extended XYZ format)
|
|
620
|
+
const stress_match = comment_line.match(/stress\s*=\s*"([^"]+)"/i);
|
|
621
|
+
if (stress_match) {
|
|
622
|
+
const stress_values = stress_match[1].split(/\s+/).map(Number);
|
|
623
|
+
if (stress_values.length === 9) {
|
|
624
|
+
// Convert flat array to 3x3 stress tensor using helper function
|
|
625
|
+
const stress_tensor = math.vec9_to_mat3x3(stress_values);
|
|
626
|
+
// Store the full stress tensor
|
|
627
|
+
frame_metadata.stress = stress_tensor;
|
|
628
|
+
// Convert to Voigt notation for stress_max calculation
|
|
629
|
+
const [s11, s22, s33, s23, s13, s12] = math.to_voigt(stress_tensor);
|
|
630
|
+
// Calculate von Mises stress (stress_max equivalent)
|
|
631
|
+
frame_metadata.stress_max = Math.sqrt(0.5 * ((s11 - s22) ** 2 + (s22 - s33) ** 2 + (s33 - s11) ** 2) +
|
|
632
|
+
3 * (s12 ** 2 + s13 ** 2 + s23 ** 2));
|
|
633
|
+
// Calculate Frobenius norm of stress tensor
|
|
634
|
+
frame_metadata.stress_frobenius = Math.sqrt(stress_values.reduce((sum, val) => sum + val ** 2, 0));
|
|
635
|
+
// Calculate pressure (negative trace/3)
|
|
636
|
+
frame_metadata.pressure = -(s11 + s22 + s33) / 3;
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
// Parse lattice matrix if present (for extended XYZ format)
|
|
640
|
+
const lattice_match = comment_line.match(/Lattice\s*=\s*"([^"]+)"/i);
|
|
641
|
+
let lattice = null;
|
|
642
|
+
if (lattice_match) {
|
|
643
|
+
const lattice_values = lattice_match[1].split(/\s+/).map(Number);
|
|
644
|
+
if (lattice_values.length === 9) {
|
|
645
|
+
// Convert flat array to 3x3 matrix
|
|
646
|
+
const lattice_matrix = [
|
|
647
|
+
[lattice_values[0], lattice_values[1], lattice_values[2]],
|
|
648
|
+
[lattice_values[3], lattice_values[4], lattice_values[5]],
|
|
649
|
+
[lattice_values[6], lattice_values[7], lattice_values[8]],
|
|
650
|
+
];
|
|
651
|
+
const lattice_params = math.calc_lattice_params(lattice_matrix);
|
|
652
|
+
lattice = { matrix: lattice_matrix, ...lattice_params, pbc: [true, true, true] };
|
|
653
|
+
// Add calculated volume to metadata if not already present
|
|
654
|
+
if (!frame_metadata.volume)
|
|
655
|
+
frame_metadata.volume = lattice.volume;
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
// Parse atomic coordinates (lines 3 to N+2)
|
|
659
|
+
const sites = [];
|
|
660
|
+
line_idx++;
|
|
661
|
+
// Pre-compute inverse matrix once per frame for efficiency
|
|
662
|
+
const inv_matrix = lattice && get_inverse_matrix(lattice.matrix);
|
|
663
|
+
for (let atom_idx = 0; atom_idx < num_atoms; atom_idx++) {
|
|
664
|
+
if (line_idx >= lines.length) {
|
|
665
|
+
throw new Error(`Incomplete XYZ frame: missing atomic coordinates`);
|
|
666
|
+
}
|
|
667
|
+
const coord_line = lines[line_idx].trim();
|
|
668
|
+
if (!coord_line) {
|
|
669
|
+
throw new Error(`Empty coordinate line in XYZ frame`);
|
|
670
|
+
}
|
|
671
|
+
const parts = coord_line.split(/\s+/);
|
|
672
|
+
if (parts.length < 4) {
|
|
673
|
+
throw new Error(`Invalid coordinate line in XYZ frame: ${coord_line}`);
|
|
674
|
+
}
|
|
675
|
+
const element = parts[0];
|
|
676
|
+
const x = parseFloat(parts[1]);
|
|
677
|
+
const y = parseFloat(parts[2]);
|
|
678
|
+
const z = parseFloat(parts[3]);
|
|
679
|
+
if (isNaN(x) || isNaN(y) || isNaN(z)) {
|
|
680
|
+
throw new Error(`Invalid coordinates in XYZ frame: ${coord_line}`);
|
|
681
|
+
}
|
|
682
|
+
// XYZ coordinates are typically in Angstroms (Cartesian)
|
|
683
|
+
const xyz = [x, y, z];
|
|
684
|
+
// Calculate fractional coordinates if lattice info is available
|
|
685
|
+
const abc = inv_matrix
|
|
686
|
+
? math.mat3x3_vec3_multiply(inv_matrix, xyz)
|
|
687
|
+
: [0, 0, 0];
|
|
688
|
+
sites.push({
|
|
689
|
+
species: [{ element, occu: 1, oxidation_state: 0 }],
|
|
690
|
+
abc,
|
|
691
|
+
xyz,
|
|
692
|
+
label: `${element}${atom_idx + 1}`,
|
|
693
|
+
properties: {},
|
|
694
|
+
});
|
|
695
|
+
line_idx++;
|
|
696
|
+
}
|
|
697
|
+
// Create structure for this frame
|
|
698
|
+
// Include lattice information if parsed from extended XYZ format
|
|
699
|
+
const structure = { sites, ...(lattice && { lattice }) };
|
|
700
|
+
frames.push({ structure, step, metadata: frame_metadata });
|
|
701
|
+
}
|
|
702
|
+
if (frames.length === 0) {
|
|
703
|
+
throw new Error(`No valid frames found in XYZ trajectory`);
|
|
704
|
+
}
|
|
705
|
+
return {
|
|
706
|
+
frames,
|
|
707
|
+
metadata: {
|
|
708
|
+
source_format: `xyz_trajectory`,
|
|
709
|
+
frame_count: frames.length,
|
|
710
|
+
total_atoms: frames[0]?.structure.sites.length || 0,
|
|
711
|
+
has_lattice_info: frames.some((f) => `lattice` in f.structure),
|
|
712
|
+
},
|
|
713
|
+
};
|
|
714
|
+
}
|
|
715
|
+
// Detect if content is multi-frame XYZ trajectory format
|
|
716
|
+
export function is_xyz_trajectory(content, filename) {
|
|
717
|
+
// Check filename patterns
|
|
718
|
+
if (filename) {
|
|
719
|
+
const basename = filename.toLowerCase().split(`/`).pop() || ``;
|
|
720
|
+
if (basename.endsWith(`.xyz`) || basename.endsWith(`.extxyz`)) {
|
|
721
|
+
// Check if it's a multi-frame XYZ by simulating the parsing process
|
|
722
|
+
const lines = content.trim().split(/\r?\n/);
|
|
723
|
+
let line_idx = 0;
|
|
724
|
+
let frame_count = 0;
|
|
725
|
+
while (line_idx < lines.length && frame_count < 10) {
|
|
726
|
+
// Skip empty lines
|
|
727
|
+
if (!lines[line_idx] || lines[line_idx].trim() === ``) {
|
|
728
|
+
line_idx++;
|
|
729
|
+
continue;
|
|
730
|
+
}
|
|
731
|
+
// Try to parse atom count
|
|
732
|
+
const num_atoms_line = lines[line_idx]?.trim();
|
|
733
|
+
const num_atoms = parseInt(num_atoms_line, 10);
|
|
734
|
+
if (isNaN(num_atoms) || num_atoms <= 0) {
|
|
735
|
+
line_idx++;
|
|
736
|
+
continue;
|
|
737
|
+
}
|
|
738
|
+
// Check if we have enough lines for this frame
|
|
739
|
+
if (line_idx + num_atoms + 1 >= lines.length)
|
|
740
|
+
break;
|
|
741
|
+
// Skip comment line
|
|
742
|
+
line_idx++;
|
|
743
|
+
// Check if the coordinate lines look valid
|
|
744
|
+
let valid_coordinates = 0;
|
|
745
|
+
for (let atom_idx = 0; atom_idx < Math.min(num_atoms, 5); atom_idx++) {
|
|
746
|
+
line_idx++;
|
|
747
|
+
if (line_idx >= lines.length)
|
|
748
|
+
break;
|
|
749
|
+
const coord_line = lines[line_idx]?.trim();
|
|
750
|
+
if (coord_line) {
|
|
751
|
+
const parts = coord_line.split(/\s+/);
|
|
752
|
+
if (parts.length >= 4) {
|
|
753
|
+
const first_token = parts[0];
|
|
754
|
+
const coords = parts.slice(1, 4);
|
|
755
|
+
const is_element = isNaN(parseInt(first_token)) && first_token.length <= 3;
|
|
756
|
+
const are_coords = coords.every((coord) => !isNaN(parseFloat(coord)));
|
|
757
|
+
if (is_element && are_coords) {
|
|
758
|
+
valid_coordinates++;
|
|
759
|
+
}
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
// If we found valid coordinates, count this as a frame
|
|
764
|
+
if (valid_coordinates >= Math.min(num_atoms, 3)) {
|
|
765
|
+
frame_count++;
|
|
766
|
+
// Skip remaining atoms in this frame
|
|
767
|
+
line_idx += num_atoms - Math.min(num_atoms, 5);
|
|
768
|
+
}
|
|
769
|
+
else
|
|
770
|
+
line_idx++;
|
|
771
|
+
}
|
|
772
|
+
// Return true if we found at least 2 valid frames
|
|
773
|
+
return frame_count >= 2;
|
|
774
|
+
}
|
|
775
|
+
}
|
|
776
|
+
return false;
|
|
777
|
+
}
|
|
778
|
+
// Parse pymatgen Trajectory format
|
|
779
|
+
export function parse_pymatgen_trajectory(obj_data, filename) {
|
|
780
|
+
const species = obj_data.species;
|
|
781
|
+
const coords = obj_data.coords; // [frame][atom][xyz]
|
|
782
|
+
const matrix = obj_data.lattice; // lattice vectors
|
|
783
|
+
const frame_properties = obj_data.frame_properties;
|
|
784
|
+
const lattice_params = math.calc_lattice_params(matrix);
|
|
785
|
+
const frames = coords.map((frame_coords, frame_idx) => {
|
|
786
|
+
// Convert coordinates and species to sites
|
|
787
|
+
const sites = frame_coords.map((xyz, site_idx) => {
|
|
788
|
+
const abc = [xyz[0], xyz[1], xyz[2]]; // pymatgen uses fractional coordinates
|
|
789
|
+
const cartesian = math.mat3x3_vec3_multiply(math.transpose_matrix(matrix), abc);
|
|
790
|
+
return {
|
|
791
|
+
species: [{ element: species[site_idx].element, occu: 1, oxidation_state: 0 }],
|
|
792
|
+
abc,
|
|
793
|
+
xyz: cartesian,
|
|
794
|
+
label: species[site_idx].element,
|
|
795
|
+
properties: {},
|
|
796
|
+
};
|
|
797
|
+
});
|
|
798
|
+
// Extract frame metadata
|
|
799
|
+
const frame_props = frame_properties[frame_idx] || {};
|
|
800
|
+
const metadata = { ...frame_props };
|
|
801
|
+
// Process forces if available
|
|
802
|
+
if (frame_props.forces && typeof frame_props.forces === `object`) {
|
|
803
|
+
const forces_obj = frame_props.forces;
|
|
804
|
+
if (forces_obj.data && Array.isArray(forces_obj.data)) {
|
|
805
|
+
metadata.forces = forces_obj.data;
|
|
806
|
+
// Calculate max force
|
|
807
|
+
const forces = forces_obj.data;
|
|
808
|
+
if (forces.length > 0) {
|
|
809
|
+
const force_magnitudes = forces.map((force) => Math.sqrt(force[0] ** 2 + force[1] ** 2 + force[2] ** 2));
|
|
810
|
+
metadata.force_max = Math.max(...force_magnitudes);
|
|
811
|
+
metadata.force_rms = Math.sqrt(force_magnitudes.reduce((sum, f) => sum + f ** 2, 0) /
|
|
812
|
+
force_magnitudes.length);
|
|
813
|
+
}
|
|
814
|
+
}
|
|
815
|
+
}
|
|
816
|
+
// Process stress if available
|
|
817
|
+
if (frame_props.stress && typeof frame_props.stress === `object`) {
|
|
818
|
+
const stress_obj = frame_props.stress;
|
|
819
|
+
if (stress_obj.data && Array.isArray(stress_obj.data)) {
|
|
820
|
+
metadata.stress = stress_obj.data;
|
|
821
|
+
// Calculate max stress (von Mises equivalent)
|
|
822
|
+
const stress = stress_obj.data;
|
|
823
|
+
if (stress.length === 3 && stress[0].length === 3) {
|
|
824
|
+
const s11 = stress[0][0], s22 = stress[1][1], s33 = stress[2][2];
|
|
825
|
+
const s12 = stress[0][1], s13 = stress[0][2], s23 = stress[1][2];
|
|
826
|
+
metadata.stress_max = Math.sqrt(0.5 * ((s11 - s22) ** 2 + (s22 - s33) ** 2 + (s33 - s11) ** 2) +
|
|
827
|
+
3 * (s12 ** 2 + s13 ** 2 + s23 ** 2));
|
|
828
|
+
// Calculate pressure (negative trace/3)
|
|
829
|
+
metadata.pressure = -(s11 + s22 + s33) / 3;
|
|
830
|
+
}
|
|
831
|
+
}
|
|
832
|
+
}
|
|
833
|
+
return {
|
|
834
|
+
structure: {
|
|
835
|
+
sites,
|
|
836
|
+
charge: obj_data.charge || 0,
|
|
837
|
+
lattice: { matrix, ...lattice_params, pbc: [true, true, true] },
|
|
838
|
+
},
|
|
839
|
+
step: frame_idx,
|
|
840
|
+
metadata,
|
|
841
|
+
};
|
|
842
|
+
});
|
|
843
|
+
return {
|
|
844
|
+
frames,
|
|
845
|
+
metadata: {
|
|
846
|
+
filename: filename || obj_data.filename,
|
|
847
|
+
source_format: `pymatgen_trajectory`,
|
|
848
|
+
species_list: species.map((s) => s.element),
|
|
849
|
+
constant_lattice: obj_data.constant_lattice,
|
|
850
|
+
frame_count: frames.length,
|
|
851
|
+
},
|
|
852
|
+
};
|
|
853
|
+
}
|
|
854
|
+
// Parse trajectory from various common formats
|
|
855
|
+
export async function parse_trajectory_data(data, filename) {
|
|
856
|
+
// Handle binary data (HDF5 files)
|
|
857
|
+
if (data instanceof ArrayBuffer) {
|
|
858
|
+
// Check if it's a torch-sim HDF5 file
|
|
859
|
+
if (is_torch_sim_hdf5(data, filename)) {
|
|
860
|
+
return await parse_torch_sim_hdf5(data, filename);
|
|
861
|
+
}
|
|
862
|
+
// If not a recognized HDF5 format, throw error
|
|
863
|
+
throw new Error(`Unsupported binary file format`);
|
|
864
|
+
}
|
|
865
|
+
// Handle string data (raw file content)
|
|
866
|
+
if (typeof data === `string`) {
|
|
867
|
+
const content = data.trim();
|
|
868
|
+
// Try multi-frame XYZ format first (before single XYZ)
|
|
869
|
+
if (is_xyz_trajectory(content, filename)) {
|
|
870
|
+
return parse_xyz_trajectory(content);
|
|
871
|
+
}
|
|
872
|
+
// Try VASP XDATCAR format
|
|
873
|
+
if (is_vasp_xdatcar(content, filename)) {
|
|
874
|
+
return parse_vasp_xdatcar(content);
|
|
875
|
+
}
|
|
876
|
+
// Try single-frame XYZ (convert to trajectory format)
|
|
877
|
+
if (filename?.toLowerCase().endsWith(`.xyz`) ||
|
|
878
|
+
filename?.toLowerCase().endsWith(`.extxyz`)) {
|
|
879
|
+
try {
|
|
880
|
+
const single_structure = parse_xyz(content);
|
|
881
|
+
if (single_structure) {
|
|
882
|
+
return {
|
|
883
|
+
frames: [{ structure: single_structure, step: 0, metadata: {} }],
|
|
884
|
+
metadata: {
|
|
885
|
+
filename,
|
|
886
|
+
source_format: `single_xyz`,
|
|
887
|
+
frame_count: 1,
|
|
888
|
+
},
|
|
889
|
+
};
|
|
890
|
+
}
|
|
891
|
+
}
|
|
892
|
+
catch (error) {
|
|
893
|
+
console.warn(`Failed to parse as single XYZ:`, error);
|
|
894
|
+
}
|
|
895
|
+
}
|
|
896
|
+
// Try JSON parsing for other formats
|
|
897
|
+
try {
|
|
898
|
+
data = JSON.parse(content);
|
|
899
|
+
}
|
|
900
|
+
catch {
|
|
901
|
+
throw new Error(`Content is not valid JSON, XYZ trajectory, single XYZ, VASP XDATCAR, or HDF5 format`);
|
|
902
|
+
}
|
|
903
|
+
}
|
|
904
|
+
if (!data || typeof data !== `object`) {
|
|
905
|
+
throw new Error(`Invalid trajectory data: must be an object or array`);
|
|
906
|
+
}
|
|
907
|
+
// Handle array format (list of frames)
|
|
908
|
+
if (Array.isArray(data)) {
|
|
909
|
+
const frames = data.map((frame_data, idx) => {
|
|
910
|
+
if (typeof frame_data !== `object` || frame_data === null) {
|
|
911
|
+
throw new Error(`Invalid frame data at index ${idx}`);
|
|
912
|
+
}
|
|
913
|
+
// Try different possible structure keys
|
|
914
|
+
const frame_obj = frame_data;
|
|
915
|
+
let structure;
|
|
916
|
+
if (frame_obj.structure && typeof frame_obj.structure === `object`) {
|
|
917
|
+
structure = frame_obj.structure;
|
|
918
|
+
}
|
|
919
|
+
else if (frame_obj.sites) {
|
|
920
|
+
// Frame data is itself a structure
|
|
921
|
+
structure = frame_data;
|
|
922
|
+
}
|
|
923
|
+
else {
|
|
924
|
+
throw new Error(`No structure found in frame ${idx}`);
|
|
925
|
+
}
|
|
926
|
+
return {
|
|
927
|
+
structure,
|
|
928
|
+
step: typeof frame_obj.step === `number` ? frame_obj.step : idx,
|
|
929
|
+
metadata: frame_obj.metadata || frame_obj,
|
|
930
|
+
};
|
|
931
|
+
});
|
|
932
|
+
return {
|
|
933
|
+
frames,
|
|
934
|
+
metadata: {
|
|
935
|
+
filename,
|
|
936
|
+
source_format: `array`,
|
|
937
|
+
frame_count: frames.length,
|
|
938
|
+
},
|
|
939
|
+
};
|
|
940
|
+
}
|
|
941
|
+
// Handle object format
|
|
942
|
+
const obj_data = data;
|
|
943
|
+
// Check if it's a pymatgen Trajectory format
|
|
944
|
+
if (obj_data[`@class`] === `Trajectory` &&
|
|
945
|
+
obj_data.species &&
|
|
946
|
+
Array.isArray(obj_data.species) &&
|
|
947
|
+
obj_data.coords &&
|
|
948
|
+
Array.isArray(obj_data.coords) &&
|
|
949
|
+
obj_data.lattice &&
|
|
950
|
+
obj_data.frame_properties &&
|
|
951
|
+
Array.isArray(obj_data.frame_properties)) {
|
|
952
|
+
return parse_pymatgen_trajectory(obj_data, filename);
|
|
953
|
+
}
|
|
954
|
+
// Check if it has a frames property
|
|
955
|
+
if (obj_data.frames && Array.isArray(obj_data.frames)) {
|
|
956
|
+
return {
|
|
957
|
+
frames: obj_data.frames,
|
|
958
|
+
metadata: {
|
|
959
|
+
...obj_data.metadata,
|
|
960
|
+
filename,
|
|
961
|
+
source_format: `object_with_frames`,
|
|
962
|
+
},
|
|
963
|
+
};
|
|
964
|
+
}
|
|
965
|
+
// Check if it's a single structure
|
|
966
|
+
if (obj_data.sites) {
|
|
967
|
+
return {
|
|
968
|
+
frames: [
|
|
969
|
+
{
|
|
970
|
+
structure: data,
|
|
971
|
+
step: 0,
|
|
972
|
+
metadata: {},
|
|
973
|
+
},
|
|
974
|
+
],
|
|
975
|
+
metadata: {
|
|
976
|
+
filename,
|
|
977
|
+
source_format: `single_structure`,
|
|
978
|
+
frame_count: 1,
|
|
979
|
+
},
|
|
980
|
+
};
|
|
981
|
+
}
|
|
982
|
+
throw new Error(`Unrecognized trajectory format: expected array of frames, object with frames property, single structure, or VASP XDATCAR format`);
|
|
983
|
+
}
|
|
984
|
+
// Helper function to detect unsupported file formats and provide helpful messages
|
|
985
|
+
export function get_unsupported_format_message(filename, content) {
|
|
986
|
+
const lower_filename = filename.toLowerCase();
|
|
987
|
+
// Check for binary ASE trajectory files
|
|
988
|
+
if (lower_filename.endsWith(`.traj`)) {
|
|
989
|
+
return create_format_error(`ASE Binary Trajectory`, filename, [
|
|
990
|
+
{
|
|
991
|
+
tool: `ASE`,
|
|
992
|
+
code: `from ase.io import read, write
|
|
993
|
+
# Read ASE trajectory
|
|
994
|
+
traj = read('${filename}', index=':')
|
|
995
|
+
# Convert to multi-frame XYZ
|
|
996
|
+
write('${filename.replace(`.traj`, `.xyz`)}', traj)`,
|
|
997
|
+
},
|
|
998
|
+
{
|
|
999
|
+
tool: `pymatgen`,
|
|
1000
|
+
code: `from pymatgen.io.ase import AseAtomsAdaptor
|
|
1001
|
+
from ase.io import read
|
|
1002
|
+
import json
|
|
1003
|
+
|
|
1004
|
+
# Read ASE trajectory and convert to pymatgen
|
|
1005
|
+
traj = read('${filename}', index=':')
|
|
1006
|
+
structures = [AseAtomsAdaptor.get_structure(atoms) for atoms in traj]
|
|
1007
|
+
|
|
1008
|
+
# Save as JSON for matterviz
|
|
1009
|
+
trajectory_data = {
|
|
1010
|
+
"frames": [{"structure": struct.as_dict(), "step": i} for i, struct in enumerate(structures)]
|
|
1011
|
+
}
|
|
1012
|
+
with open('${filename.replace(`.traj`, `.json`)}', 'w') as file:
|
|
1013
|
+
json.dump(trajectory_data, file)`,
|
|
1014
|
+
},
|
|
1015
|
+
]);
|
|
1016
|
+
}
|
|
1017
|
+
// Check for LAMMPS trajectory files
|
|
1018
|
+
if (lower_filename.endsWith(`.dump`) ||
|
|
1019
|
+
lower_filename.endsWith(`.lammpstrj`)) {
|
|
1020
|
+
return create_format_error(`LAMMPS Trajectory`, filename, [
|
|
1021
|
+
{
|
|
1022
|
+
tool: `pymatgen`,
|
|
1023
|
+
code: `from pymatgen.io.lammps.data import LammpsData
|
|
1024
|
+
# Convert LAMMPS trajectory to supported format
|
|
1025
|
+
# (specific code depends on LAMMPS trajectory format)`,
|
|
1026
|
+
},
|
|
1027
|
+
]);
|
|
1028
|
+
}
|
|
1029
|
+
// Check for NetCDF files (common in MD simulations)
|
|
1030
|
+
if (lower_filename.endsWith(`.nc`) || lower_filename.endsWith(`.netcdf`)) {
|
|
1031
|
+
return create_format_error(`NetCDF Trajectory`, filename, [
|
|
1032
|
+
{
|
|
1033
|
+
tool: `MDAnalysis`,
|
|
1034
|
+
code: `import MDAnalysis as mda
|
|
1035
|
+
# Convert NetCDF to XYZ format
|
|
1036
|
+
u = mda.Universe('topology.pdb', '${filename}')
|
|
1037
|
+
u.atoms.write('${filename.replace(/\.(nc|netcdf)$/, `.xyz`)}', frames='all')`,
|
|
1038
|
+
},
|
|
1039
|
+
]);
|
|
1040
|
+
}
|
|
1041
|
+
// Check for DCD files (CHARMM/NAMD trajectories) # codespell:ignore
|
|
1042
|
+
if (lower_filename.endsWith(`.dcd`)) {
|
|
1043
|
+
return create_format_error(`DCD Trajectory`, filename, [
|
|
1044
|
+
{
|
|
1045
|
+
tool: `MDAnalysis`,
|
|
1046
|
+
code: `import MDAnalysis as mda
|
|
1047
|
+
# You'll need a topology file (PSF, PDB, etc.)
|
|
1048
|
+
u = mda.Universe('topology.psf', '${filename}')
|
|
1049
|
+
u.atoms.write('${filename.replace(`.dcd`, `.xyz`)}', frames='all')`,
|
|
1050
|
+
},
|
|
1051
|
+
]);
|
|
1052
|
+
}
|
|
1053
|
+
// Check if content looks like binary data
|
|
1054
|
+
if (content.length > 0 && is_binary(content)) {
|
|
1055
|
+
return `
|
|
1056
|
+
<div class="unsupported-format">
|
|
1057
|
+
<h4>🚫 Unsupported Format: Binary File</h4>
|
|
1058
|
+
<p>The file <code>${escape_html(filename)}</code> appears to be a binary file and cannot be parsed as text.</p>
|
|
1059
|
+
<div class="code-options">
|
|
1060
|
+
<h5>💡 Supported Formats:</h5>
|
|
1061
|
+
<ul>
|
|
1062
|
+
<li>Multi-frame XYZ files (text-based)</li>
|
|
1063
|
+
<li>Pymatgen trajectory JSON</li>
|
|
1064
|
+
<li>VASP XDATCAR files</li>
|
|
1065
|
+
<li>Compressed versions (.gz) of the above</li>
|
|
1066
|
+
</ul>
|
|
1067
|
+
<p>Please convert your trajectory to one of these text-based formats.</p>
|
|
1068
|
+
</div>
|
|
1069
|
+
</div>
|
|
1070
|
+
`;
|
|
1071
|
+
}
|
|
1072
|
+
return null;
|
|
1073
|
+
}
|
|
1074
|
+
// Simplified format error creation
|
|
1075
|
+
function create_format_error(format_name, filename, conversions) {
|
|
1076
|
+
const conversion_html = conversions
|
|
1077
|
+
.map(({ tool, code }) => `
|
|
1078
|
+
<div>
|
|
1079
|
+
<strong>${tool}:</strong>
|
|
1080
|
+
<pre class="language-python">${code}</pre>
|
|
1081
|
+
</div>`)
|
|
1082
|
+
.join(``);
|
|
1083
|
+
return `
|
|
1084
|
+
<div class="unsupported-format">
|
|
1085
|
+
<h4>🚫 Unsupported Format: ${escape_html(format_name)}</h4>
|
|
1086
|
+
<p>The file <code>${escape_html(filename)}</code> appears to be a ${escape_html(format_name.toLowerCase())} file, which is not directly supported.</p>
|
|
1087
|
+
<h5>💡 Conversion Options:</h5>
|
|
1088
|
+
<div class="code-options">
|
|
1089
|
+
${conversion_html}
|
|
1090
|
+
</div>
|
|
1091
|
+
</div>
|
|
1092
|
+
`;
|
|
1093
|
+
}
|