@uoa-css-lab/duckscatter 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/dependabot.yml +42 -0
- package/.github/workflows/ci.yaml +111 -0
- package/.github/workflows/release.yml +55 -0
- package/.prettierrc +11 -0
- package/LICENSE +22 -0
- package/README.md +250 -0
- package/dist/data/data-layer.d.ts +169 -0
- package/dist/data/data-layer.js +402 -0
- package/dist/data/index.d.ts +2 -0
- package/dist/data/index.js +2 -0
- package/dist/data/repository.d.ts +48 -0
- package/dist/data/repository.js +109 -0
- package/dist/diagnostics.d.ts +27 -0
- package/dist/diagnostics.js +71 -0
- package/dist/errors.d.ts +22 -0
- package/dist/errors.js +58 -0
- package/dist/event-emitter.d.ts +62 -0
- package/dist/event-emitter.js +82 -0
- package/dist/index.d.ts +12 -0
- package/dist/index.js +13 -0
- package/dist/renderer/gpu-layer.d.ts +204 -0
- package/dist/renderer/gpu-layer.js +611 -0
- package/dist/renderer/index.d.ts +3 -0
- package/dist/renderer/index.js +3 -0
- package/dist/renderer/shaders.d.ts +13 -0
- package/dist/renderer/shaders.js +216 -0
- package/dist/renderer/webgpu-context.d.ts +20 -0
- package/dist/renderer/webgpu-context.js +88 -0
- package/dist/scatter-plot.d.ts +210 -0
- package/dist/scatter-plot.js +450 -0
- package/dist/types.d.ts +171 -0
- package/dist/types.js +1 -0
- package/dist/ui/index.d.ts +1 -0
- package/dist/ui/index.js +1 -0
- package/dist/ui/label-layer.d.ts +176 -0
- package/dist/ui/label-layer.js +488 -0
- package/docs/image.png +0 -0
- package/eslint.config.js +72 -0
- package/examples/next/README.md +36 -0
- package/examples/next/app/components/ColorExpressionInput.tsx +41 -0
- package/examples/next/app/components/ControlPanel.tsx +30 -0
- package/examples/next/app/components/HoverControlPanel.tsx +69 -0
- package/examples/next/app/components/HoverInfoDisplay.tsx +40 -0
- package/examples/next/app/components/LabelFilterInput.tsx +46 -0
- package/examples/next/app/components/LabelList.tsx +106 -0
- package/examples/next/app/components/PointAlphaSlider.tsx +21 -0
- package/examples/next/app/components/PointLimitSlider.tsx +23 -0
- package/examples/next/app/components/PointList.tsx +105 -0
- package/examples/next/app/components/PointSizeScaleSlider.tsx +22 -0
- package/examples/next/app/components/ScatterPlotCanvas.tsx +150 -0
- package/examples/next/app/components/SearchBox.tsx +46 -0
- package/examples/next/app/components/Slider.tsx +76 -0
- package/examples/next/app/components/StatsDisplay.tsx +15 -0
- package/examples/next/app/components/TimeFilterSlider.tsx +169 -0
- package/examples/next/app/context/ScatterPlotContext.tsx +402 -0
- package/examples/next/app/favicon.ico +0 -0
- package/examples/next/app/globals.css +23 -0
- package/examples/next/app/layout.tsx +35 -0
- package/examples/next/app/page.tsx +15 -0
- package/examples/next/eslint.config.mjs +18 -0
- package/examples/next/next.config.ts +7 -0
- package/examples/next/package-lock.json +6572 -0
- package/examples/next/package.json +27 -0
- package/examples/next/postcss.config.mjs +7 -0
- package/examples/next/scripts/generate_labels.py +167 -0
- package/examples/next/tsconfig.json +34 -0
- package/package.json +43 -0
- package/src/data/data-layer.ts +515 -0
- package/src/data/index.ts +2 -0
- package/src/data/repository.ts +146 -0
- package/src/diagnostics.ts +108 -0
- package/src/errors.ts +69 -0
- package/src/event-emitter.ts +88 -0
- package/src/index.ts +40 -0
- package/src/renderer/gpu-layer.ts +757 -0
- package/src/renderer/index.ts +3 -0
- package/src/renderer/shaders.ts +219 -0
- package/src/renderer/webgpu-context.ts +98 -0
- package/src/scatter-plot.ts +533 -0
- package/src/types.ts +218 -0
- package/src/ui/index.ts +1 -0
- package/src/ui/label-layer.ts +648 -0
- package/tsconfig.json +19 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "next",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"private": true,
|
|
5
|
+
"scripts": {
|
|
6
|
+
"dev": "next dev",
|
|
7
|
+
"build": "next build",
|
|
8
|
+
"start": "next start",
|
|
9
|
+
"lint": "eslint"
|
|
10
|
+
},
|
|
11
|
+
"dependencies": {
|
|
12
|
+
"next": "^16.1.1",
|
|
13
|
+
"react": "19.2.3",
|
|
14
|
+
"react-dom": "19.2.3",
|
|
15
|
+
"@uoa-css-lab/duckscatter": "file:../../"
|
|
16
|
+
},
|
|
17
|
+
"devDependencies": {
|
|
18
|
+
"@tailwindcss/postcss": "^4",
|
|
19
|
+
"@types/node": "^25",
|
|
20
|
+
"@types/react": "^19",
|
|
21
|
+
"@types/react-dom": "^19",
|
|
22
|
+
"eslint": "^9",
|
|
23
|
+
"eslint-config-next": "16.1.1",
|
|
24
|
+
"tailwindcss": "^4",
|
|
25
|
+
"typescript": "^5"
|
|
26
|
+
}
|
|
27
|
+
}
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Generate label.geojson from output.parquet using DBSCAN clustering and OpenAI API.
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
cd examples/next
|
|
7
|
+
export OPENAI_API_KEY="your-api-key"
|
|
8
|
+
python scripts/generate_labels.py
|
|
9
|
+
|
|
10
|
+
Output:
|
|
11
|
+
public/label.geojson
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import os
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
import pandas as pd
|
|
20
|
+
from openai import OpenAI
|
|
21
|
+
from sklearn.cluster import DBSCAN
|
|
22
|
+
from sklearn.preprocessing import StandardScaler
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def find_optimal_eps(X: np.ndarray, target_clusters: int = 40) -> float:
|
|
26
|
+
"""
|
|
27
|
+
Find eps value that produces approximately target_clusters clusters.
|
|
28
|
+
Uses binary search to find the optimal eps.
|
|
29
|
+
"""
|
|
30
|
+
scaler = StandardScaler()
|
|
31
|
+
X_scaled = scaler.fit_transform(X)
|
|
32
|
+
|
|
33
|
+
eps_min, eps_max = 0.01, 2.0
|
|
34
|
+
best_eps = 0.5
|
|
35
|
+
best_diff = float('inf')
|
|
36
|
+
|
|
37
|
+
for _ in range(20):
|
|
38
|
+
eps = (eps_min + eps_max) / 2
|
|
39
|
+
db = DBSCAN(eps=eps, min_samples=5)
|
|
40
|
+
labels = db.fit_predict(X_scaled)
|
|
41
|
+
n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
|
|
42
|
+
|
|
43
|
+
diff = abs(n_clusters - target_clusters)
|
|
44
|
+
if diff < best_diff:
|
|
45
|
+
best_diff = diff
|
|
46
|
+
best_eps = eps
|
|
47
|
+
|
|
48
|
+
if n_clusters < target_clusters:
|
|
49
|
+
eps_max = eps
|
|
50
|
+
elif n_clusters > target_clusters:
|
|
51
|
+
eps_min = eps
|
|
52
|
+
else:
|
|
53
|
+
break
|
|
54
|
+
|
|
55
|
+
return best_eps
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def generate_label_with_llm(client: OpenAI, tokens: list[str], model: str = "gpt-4o-mini") -> str:
|
|
59
|
+
"""
|
|
60
|
+
Generate a summary label for a cluster using OpenAI API.
|
|
61
|
+
"""
|
|
62
|
+
tokens_sample = tokens[:50]
|
|
63
|
+
tokens_str = ", ".join(tokens_sample)
|
|
64
|
+
|
|
65
|
+
response = client.chat.completions.create(
|
|
66
|
+
model=model,
|
|
67
|
+
messages=[
|
|
68
|
+
{
|
|
69
|
+
"role": "system",
|
|
70
|
+
"content": "You are a helpful assistant that creates concise labels for word clusters. "
|
|
71
|
+
"Given a list of related words, output a single short label (1-3 words) that "
|
|
72
|
+
"best summarizes or categorizes the group. Output only the label, nothing else."
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
"role": "user",
|
|
76
|
+
"content": f"Create a short label for this group of words:\n{tokens_str}"
|
|
77
|
+
}
|
|
78
|
+
],
|
|
79
|
+
max_tokens=20,
|
|
80
|
+
temperature=0.3
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
return response.choices[0].message.content.strip()
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def main():
|
|
87
|
+
api_key = os.environ.get("OPENAI_API_KEY")
|
|
88
|
+
if not api_key:
|
|
89
|
+
raise ValueError("OPENAI_API_KEY environment variable is required")
|
|
90
|
+
|
|
91
|
+
client = OpenAI(api_key=api_key)
|
|
92
|
+
|
|
93
|
+
script_dir = Path(__file__).parent
|
|
94
|
+
parquet_path = script_dir.parent / "public" / "output.parquet"
|
|
95
|
+
output_path = script_dir.parent / "public" / "label.geojson"
|
|
96
|
+
|
|
97
|
+
print(f"Reading parquet from: {parquet_path}")
|
|
98
|
+
df = pd.read_parquet(parquet_path)
|
|
99
|
+
print(f"Loaded {len(df)} rows")
|
|
100
|
+
print(f"Columns: {list(df.columns)}")
|
|
101
|
+
|
|
102
|
+
X = df[['x', 'y']].values
|
|
103
|
+
|
|
104
|
+
scaler = StandardScaler()
|
|
105
|
+
X_scaled = scaler.fit_transform(X)
|
|
106
|
+
|
|
107
|
+
print("Finding optimal eps for ~40 clusters...")
|
|
108
|
+
optimal_eps = find_optimal_eps(X, target_clusters=40)
|
|
109
|
+
print(f"Using eps={optimal_eps:.4f}")
|
|
110
|
+
|
|
111
|
+
db = DBSCAN(eps=optimal_eps, min_samples=5)
|
|
112
|
+
df['cluster'] = db.fit_predict(X_scaled)
|
|
113
|
+
|
|
114
|
+
n_clusters = len(set(df['cluster'])) - (1 if -1 in df['cluster'].values else 0)
|
|
115
|
+
n_noise = (df['cluster'] == -1).sum()
|
|
116
|
+
print(f"Found {n_clusters} clusters, {n_noise} noise points")
|
|
117
|
+
|
|
118
|
+
clustered = df[df['cluster'] != -1]
|
|
119
|
+
|
|
120
|
+
features = []
|
|
121
|
+
cluster_ids = sorted(clustered['cluster'].unique())
|
|
122
|
+
|
|
123
|
+
for i, cluster_id in enumerate(cluster_ids):
|
|
124
|
+
cluster_data = clustered[clustered['cluster'] == cluster_id]
|
|
125
|
+
|
|
126
|
+
centroid_x = cluster_data['x'].mean()
|
|
127
|
+
centroid_y = cluster_data['y'].mean()
|
|
128
|
+
count = len(cluster_data)
|
|
129
|
+
|
|
130
|
+
if 'token' in cluster_data.columns:
|
|
131
|
+
tokens = cluster_data['token'].tolist()
|
|
132
|
+
print(f"[{i+1}/{len(cluster_ids)}] Generating label for cluster {cluster_id} ({count} tokens)...")
|
|
133
|
+
label = generate_label_with_llm(client, tokens)
|
|
134
|
+
else:
|
|
135
|
+
label = f"Cluster {cluster_id}"
|
|
136
|
+
|
|
137
|
+
print(f" -> {label}")
|
|
138
|
+
|
|
139
|
+
feature = {
|
|
140
|
+
"type": "Feature",
|
|
141
|
+
"geometry": {
|
|
142
|
+
"type": "Point",
|
|
143
|
+
"coordinates": [float(centroid_x), float(centroid_y)]
|
|
144
|
+
},
|
|
145
|
+
"properties": {
|
|
146
|
+
"cluster_label": label,
|
|
147
|
+
"cluster": int(cluster_id),
|
|
148
|
+
"count": int(count)
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
features.append(feature)
|
|
152
|
+
|
|
153
|
+
features.sort(key=lambda f: f['properties']['count'], reverse=True)
|
|
154
|
+
|
|
155
|
+
geojson = {
|
|
156
|
+
"type": "FeatureCollection",
|
|
157
|
+
"features": features
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
|
161
|
+
json.dump(geojson, f, ensure_ascii=False, indent=2)
|
|
162
|
+
|
|
163
|
+
print(f"Saved {len(features)} labels to: {output_path}")
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
if __name__ == "__main__":
|
|
167
|
+
main()
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2017",
|
|
4
|
+
"lib": ["dom", "dom.iterable", "esnext"],
|
|
5
|
+
"allowJs": true,
|
|
6
|
+
"skipLibCheck": true,
|
|
7
|
+
"strict": true,
|
|
8
|
+
"noEmit": true,
|
|
9
|
+
"esModuleInterop": true,
|
|
10
|
+
"module": "esnext",
|
|
11
|
+
"moduleResolution": "bundler",
|
|
12
|
+
"resolveJsonModule": true,
|
|
13
|
+
"isolatedModules": true,
|
|
14
|
+
"jsx": "react-jsx",
|
|
15
|
+
"incremental": true,
|
|
16
|
+
"plugins": [
|
|
17
|
+
{
|
|
18
|
+
"name": "next"
|
|
19
|
+
}
|
|
20
|
+
],
|
|
21
|
+
"paths": {
|
|
22
|
+
"@/*": ["./*"]
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
"include": [
|
|
26
|
+
"next-env.d.ts",
|
|
27
|
+
"**/*.ts",
|
|
28
|
+
"**/*.tsx",
|
|
29
|
+
".next/types/**/*.ts",
|
|
30
|
+
".next/dev/types/**/*.ts",
|
|
31
|
+
"**/*.mts"
|
|
32
|
+
],
|
|
33
|
+
"exclude": ["node_modules"]
|
|
34
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@uoa-css-lab/duckscatter",
|
|
3
|
+
"version": "1.3.0",
|
|
4
|
+
"description": "A TypeScript library for plotting scatter charts using WebGPU",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
7
|
+
"scripts": {
|
|
8
|
+
"build": "tsc",
|
|
9
|
+
"dev": "tsc --watch",
|
|
10
|
+
"serve": "npx http-server . -p 8080",
|
|
11
|
+
"test": "true",
|
|
12
|
+
"lint": "eslint . --ext .ts,.tsx",
|
|
13
|
+
"lint:fix": "eslint . --ext .ts,.tsx --fix",
|
|
14
|
+
"lint:examples": "npm run lint --prefix examples/next",
|
|
15
|
+
"lint:all": "npm run lint && npm run lint:examples",
|
|
16
|
+
"format": "prettier --write \"**/*.{ts,tsx,json,md}\"",
|
|
17
|
+
"format:check": "prettier --check \"**/*.{ts,tsx,json,md}\""
|
|
18
|
+
},
|
|
19
|
+
"keywords": [
|
|
20
|
+
"scatter",
|
|
21
|
+
"plot",
|
|
22
|
+
"chart",
|
|
23
|
+
"webgpu",
|
|
24
|
+
"visualization",
|
|
25
|
+
"typescript"
|
|
26
|
+
],
|
|
27
|
+
"author": "",
|
|
28
|
+
"license": "MIT",
|
|
29
|
+
"devDependencies": {
|
|
30
|
+
"@duckdb/duckdb-wasm": "^1.30.0",
|
|
31
|
+
"@eslint/js": "^9.39.1",
|
|
32
|
+
"@typescript-eslint/eslint-plugin": "^8.48.0",
|
|
33
|
+
"@typescript-eslint/parser": "^8.48.0",
|
|
34
|
+
"@webgpu/types": "^0.1.40",
|
|
35
|
+
"eslint": "^9.39.1",
|
|
36
|
+
"eslint-config-prettier": "^10.1.8",
|
|
37
|
+
"eslint-plugin-prettier": "^5.5.4",
|
|
38
|
+
"prettier": "^3.6.2",
|
|
39
|
+
"typescript": "^5.3.3",
|
|
40
|
+
"typescript-eslint": "^8.48.0"
|
|
41
|
+
},
|
|
42
|
+
"dependencies": {}
|
|
43
|
+
}
|