@uoa-css-lab/duckscatter 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/.github/dependabot.yml +42 -0
  2. package/.github/workflows/ci.yaml +111 -0
  3. package/.github/workflows/release.yml +55 -0
  4. package/.prettierrc +11 -0
  5. package/LICENSE +22 -0
  6. package/README.md +250 -0
  7. package/dist/data/data-layer.d.ts +169 -0
  8. package/dist/data/data-layer.js +402 -0
  9. package/dist/data/index.d.ts +2 -0
  10. package/dist/data/index.js +2 -0
  11. package/dist/data/repository.d.ts +48 -0
  12. package/dist/data/repository.js +109 -0
  13. package/dist/diagnostics.d.ts +27 -0
  14. package/dist/diagnostics.js +71 -0
  15. package/dist/errors.d.ts +22 -0
  16. package/dist/errors.js +58 -0
  17. package/dist/event-emitter.d.ts +62 -0
  18. package/dist/event-emitter.js +82 -0
  19. package/dist/index.d.ts +12 -0
  20. package/dist/index.js +13 -0
  21. package/dist/renderer/gpu-layer.d.ts +204 -0
  22. package/dist/renderer/gpu-layer.js +611 -0
  23. package/dist/renderer/index.d.ts +3 -0
  24. package/dist/renderer/index.js +3 -0
  25. package/dist/renderer/shaders.d.ts +13 -0
  26. package/dist/renderer/shaders.js +216 -0
  27. package/dist/renderer/webgpu-context.d.ts +20 -0
  28. package/dist/renderer/webgpu-context.js +88 -0
  29. package/dist/scatter-plot.d.ts +210 -0
  30. package/dist/scatter-plot.js +450 -0
  31. package/dist/types.d.ts +171 -0
  32. package/dist/types.js +1 -0
  33. package/dist/ui/index.d.ts +1 -0
  34. package/dist/ui/index.js +1 -0
  35. package/dist/ui/label-layer.d.ts +176 -0
  36. package/dist/ui/label-layer.js +488 -0
  37. package/docs/image.png +0 -0
  38. package/eslint.config.js +72 -0
  39. package/examples/next/README.md +36 -0
  40. package/examples/next/app/components/ColorExpressionInput.tsx +41 -0
  41. package/examples/next/app/components/ControlPanel.tsx +30 -0
  42. package/examples/next/app/components/HoverControlPanel.tsx +69 -0
  43. package/examples/next/app/components/HoverInfoDisplay.tsx +40 -0
  44. package/examples/next/app/components/LabelFilterInput.tsx +46 -0
  45. package/examples/next/app/components/LabelList.tsx +106 -0
  46. package/examples/next/app/components/PointAlphaSlider.tsx +21 -0
  47. package/examples/next/app/components/PointLimitSlider.tsx +23 -0
  48. package/examples/next/app/components/PointList.tsx +105 -0
  49. package/examples/next/app/components/PointSizeScaleSlider.tsx +22 -0
  50. package/examples/next/app/components/ScatterPlotCanvas.tsx +150 -0
  51. package/examples/next/app/components/SearchBox.tsx +46 -0
  52. package/examples/next/app/components/Slider.tsx +76 -0
  53. package/examples/next/app/components/StatsDisplay.tsx +15 -0
  54. package/examples/next/app/components/TimeFilterSlider.tsx +169 -0
  55. package/examples/next/app/context/ScatterPlotContext.tsx +402 -0
  56. package/examples/next/app/favicon.ico +0 -0
  57. package/examples/next/app/globals.css +23 -0
  58. package/examples/next/app/layout.tsx +35 -0
  59. package/examples/next/app/page.tsx +15 -0
  60. package/examples/next/eslint.config.mjs +18 -0
  61. package/examples/next/next.config.ts +7 -0
  62. package/examples/next/package-lock.json +6572 -0
  63. package/examples/next/package.json +27 -0
  64. package/examples/next/postcss.config.mjs +7 -0
  65. package/examples/next/scripts/generate_labels.py +167 -0
  66. package/examples/next/tsconfig.json +34 -0
  67. package/package.json +43 -0
  68. package/src/data/data-layer.ts +515 -0
  69. package/src/data/index.ts +2 -0
  70. package/src/data/repository.ts +146 -0
  71. package/src/diagnostics.ts +108 -0
  72. package/src/errors.ts +69 -0
  73. package/src/event-emitter.ts +88 -0
  74. package/src/index.ts +40 -0
  75. package/src/renderer/gpu-layer.ts +757 -0
  76. package/src/renderer/index.ts +3 -0
  77. package/src/renderer/shaders.ts +219 -0
  78. package/src/renderer/webgpu-context.ts +98 -0
  79. package/src/scatter-plot.ts +533 -0
  80. package/src/types.ts +218 -0
  81. package/src/ui/index.ts +1 -0
  82. package/src/ui/label-layer.ts +648 -0
  83. package/tsconfig.json +19 -0
@@ -0,0 +1,27 @@
1
+ {
2
+ "name": "next",
3
+ "version": "0.1.0",
4
+ "private": true,
5
+ "scripts": {
6
+ "dev": "next dev",
7
+ "build": "next build",
8
+ "start": "next start",
9
+ "lint": "eslint"
10
+ },
11
+ "dependencies": {
12
+ "next": "^16.1.1",
13
+ "react": "19.2.3",
14
+ "react-dom": "19.2.3",
15
+ "@uoa-css-lab/duckscatter": "file:../../"
16
+ },
17
+ "devDependencies": {
18
+ "@tailwindcss/postcss": "^4",
19
+ "@types/node": "^25",
20
+ "@types/react": "^19",
21
+ "@types/react-dom": "^19",
22
+ "eslint": "^9",
23
+ "eslint-config-next": "16.1.1",
24
+ "tailwindcss": "^4",
25
+ "typescript": "^5"
26
+ }
27
+ }
@@ -0,0 +1,7 @@
1
+ const config = {
2
+ plugins: {
3
+ "@tailwindcss/postcss": {},
4
+ },
5
+ };
6
+
7
+ export default config;
@@ -0,0 +1,167 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Generate label.geojson from output.parquet using DBSCAN clustering and OpenAI API.
4
+
5
+ Usage:
6
+ cd examples/next
7
+ export OPENAI_API_KEY="your-api-key"
8
+ python scripts/generate_labels.py
9
+
10
+ Output:
11
+ public/label.geojson
12
+ """
13
+
14
+ import json
15
+ import os
16
+ from pathlib import Path
17
+
18
+ import numpy as np
19
+ import pandas as pd
20
+ from openai import OpenAI
21
+ from sklearn.cluster import DBSCAN
22
+ from sklearn.preprocessing import StandardScaler
23
+
24
+
25
+ def find_optimal_eps(X: np.ndarray, target_clusters: int = 40) -> float:
26
+ """
27
+ Find eps value that produces approximately target_clusters clusters.
28
+ Uses binary search to find the optimal eps.
29
+ """
30
+ scaler = StandardScaler()
31
+ X_scaled = scaler.fit_transform(X)
32
+
33
+ eps_min, eps_max = 0.01, 2.0
34
+ best_eps = 0.5
35
+ best_diff = float('inf')
36
+
37
+ for _ in range(20):
38
+ eps = (eps_min + eps_max) / 2
39
+ db = DBSCAN(eps=eps, min_samples=5)
40
+ labels = db.fit_predict(X_scaled)
41
+ n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
42
+
43
+ diff = abs(n_clusters - target_clusters)
44
+ if diff < best_diff:
45
+ best_diff = diff
46
+ best_eps = eps
47
+
48
+ if n_clusters < target_clusters:
49
+ eps_max = eps
50
+ elif n_clusters > target_clusters:
51
+ eps_min = eps
52
+ else:
53
+ break
54
+
55
+ return best_eps
56
+
57
+
58
+ def generate_label_with_llm(client: OpenAI, tokens: list[str], model: str = "gpt-4o-mini") -> str:
59
+ """
60
+ Generate a summary label for a cluster using OpenAI API.
61
+ """
62
+ tokens_sample = tokens[:50]
63
+ tokens_str = ", ".join(tokens_sample)
64
+
65
+ response = client.chat.completions.create(
66
+ model=model,
67
+ messages=[
68
+ {
69
+ "role": "system",
70
+ "content": "You are a helpful assistant that creates concise labels for word clusters. "
71
+ "Given a list of related words, output a single short label (1-3 words) that "
72
+ "best summarizes or categorizes the group. Output only the label, nothing else."
73
+ },
74
+ {
75
+ "role": "user",
76
+ "content": f"Create a short label for this group of words:\n{tokens_str}"
77
+ }
78
+ ],
79
+ max_tokens=20,
80
+ temperature=0.3
81
+ )
82
+
83
+ return response.choices[0].message.content.strip()
84
+
85
+
86
+ def main():
87
+ api_key = os.environ.get("OPENAI_API_KEY")
88
+ if not api_key:
89
+ raise ValueError("OPENAI_API_KEY environment variable is required")
90
+
91
+ client = OpenAI(api_key=api_key)
92
+
93
+ script_dir = Path(__file__).parent
94
+ parquet_path = script_dir.parent / "public" / "output.parquet"
95
+ output_path = script_dir.parent / "public" / "label.geojson"
96
+
97
+ print(f"Reading parquet from: {parquet_path}")
98
+ df = pd.read_parquet(parquet_path)
99
+ print(f"Loaded {len(df)} rows")
100
+ print(f"Columns: {list(df.columns)}")
101
+
102
+ X = df[['x', 'y']].values
103
+
104
+ scaler = StandardScaler()
105
+ X_scaled = scaler.fit_transform(X)
106
+
107
+ print("Finding optimal eps for ~40 clusters...")
108
+ optimal_eps = find_optimal_eps(X, target_clusters=40)
109
+ print(f"Using eps={optimal_eps:.4f}")
110
+
111
+ db = DBSCAN(eps=optimal_eps, min_samples=5)
112
+ df['cluster'] = db.fit_predict(X_scaled)
113
+
114
+ n_clusters = len(set(df['cluster'])) - (1 if -1 in df['cluster'].values else 0)
115
+ n_noise = (df['cluster'] == -1).sum()
116
+ print(f"Found {n_clusters} clusters, {n_noise} noise points")
117
+
118
+ clustered = df[df['cluster'] != -1]
119
+
120
+ features = []
121
+ cluster_ids = sorted(clustered['cluster'].unique())
122
+
123
+ for i, cluster_id in enumerate(cluster_ids):
124
+ cluster_data = clustered[clustered['cluster'] == cluster_id]
125
+
126
+ centroid_x = cluster_data['x'].mean()
127
+ centroid_y = cluster_data['y'].mean()
128
+ count = len(cluster_data)
129
+
130
+ if 'token' in cluster_data.columns:
131
+ tokens = cluster_data['token'].tolist()
132
+ print(f"[{i+1}/{len(cluster_ids)}] Generating label for cluster {cluster_id} ({count} tokens)...")
133
+ label = generate_label_with_llm(client, tokens)
134
+ else:
135
+ label = f"Cluster {cluster_id}"
136
+
137
+ print(f" -> {label}")
138
+
139
+ feature = {
140
+ "type": "Feature",
141
+ "geometry": {
142
+ "type": "Point",
143
+ "coordinates": [float(centroid_x), float(centroid_y)]
144
+ },
145
+ "properties": {
146
+ "cluster_label": label,
147
+ "cluster": int(cluster_id),
148
+ "count": int(count)
149
+ }
150
+ }
151
+ features.append(feature)
152
+
153
+ features.sort(key=lambda f: f['properties']['count'], reverse=True)
154
+
155
+ geojson = {
156
+ "type": "FeatureCollection",
157
+ "features": features
158
+ }
159
+
160
+ with open(output_path, 'w', encoding='utf-8') as f:
161
+ json.dump(geojson, f, ensure_ascii=False, indent=2)
162
+
163
+ print(f"Saved {len(features)} labels to: {output_path}")
164
+
165
+
166
+ if __name__ == "__main__":
167
+ main()
@@ -0,0 +1,34 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2017",
4
+ "lib": ["dom", "dom.iterable", "esnext"],
5
+ "allowJs": true,
6
+ "skipLibCheck": true,
7
+ "strict": true,
8
+ "noEmit": true,
9
+ "esModuleInterop": true,
10
+ "module": "esnext",
11
+ "moduleResolution": "bundler",
12
+ "resolveJsonModule": true,
13
+ "isolatedModules": true,
14
+ "jsx": "react-jsx",
15
+ "incremental": true,
16
+ "plugins": [
17
+ {
18
+ "name": "next"
19
+ }
20
+ ],
21
+ "paths": {
22
+ "@/*": ["./*"]
23
+ }
24
+ },
25
+ "include": [
26
+ "next-env.d.ts",
27
+ "**/*.ts",
28
+ "**/*.tsx",
29
+ ".next/types/**/*.ts",
30
+ ".next/dev/types/**/*.ts",
31
+ "**/*.mts"
32
+ ],
33
+ "exclude": ["node_modules"]
34
+ }
package/package.json ADDED
@@ -0,0 +1,43 @@
1
+ {
2
+ "name": "@uoa-css-lab/duckscatter",
3
+ "version": "1.3.0",
4
+ "description": "A TypeScript library for plotting scatter charts using WebGPU",
5
+ "main": "dist/index.js",
6
+ "types": "dist/index.d.ts",
7
+ "scripts": {
8
+ "build": "tsc",
9
+ "dev": "tsc --watch",
10
+ "serve": "npx http-server . -p 8080",
11
+ "test": "true",
12
+ "lint": "eslint . --ext .ts,.tsx",
13
+ "lint:fix": "eslint . --ext .ts,.tsx --fix",
14
+ "lint:examples": "npm run lint --prefix examples/next",
15
+ "lint:all": "npm run lint && npm run lint:examples",
16
+ "format": "prettier --write \"**/*.{ts,tsx,json,md}\"",
17
+ "format:check": "prettier --check \"**/*.{ts,tsx,json,md}\""
18
+ },
19
+ "keywords": [
20
+ "scatter",
21
+ "plot",
22
+ "chart",
23
+ "webgpu",
24
+ "visualization",
25
+ "typescript"
26
+ ],
27
+ "author": "",
28
+ "license": "MIT",
29
+ "devDependencies": {
30
+ "@duckdb/duckdb-wasm": "^1.30.0",
31
+ "@eslint/js": "^9.39.1",
32
+ "@typescript-eslint/eslint-plugin": "^8.48.0",
33
+ "@typescript-eslint/parser": "^8.48.0",
34
+ "@webgpu/types": "^0.1.40",
35
+ "eslint": "^9.39.1",
36
+ "eslint-config-prettier": "^10.1.8",
37
+ "eslint-plugin-prettier": "^5.5.4",
38
+ "prettier": "^3.6.2",
39
+ "typescript": "^5.3.3",
40
+ "typescript-eslint": "^8.48.0"
41
+ },
42
+ "dependencies": {}
43
+ }