lightning-core 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lightning_core-0.1.2/.github/workflows/python-wheel-publish.yml +97 -0
- lightning_core-0.1.2/.gitignore +17 -0
- lightning_core-0.1.2/CMakeLists.txt +173 -0
- lightning_core-0.1.2/PKG-INFO +125 -0
- lightning_core-0.1.2/README.md +98 -0
- lightning_core-0.1.2/ROADMAP.md +58 -0
- lightning_core-0.1.2/benchmarks/CMakeLists.txt +28 -0
- lightning_core-0.1.2/benchmarks/bench_attention.cpp +424 -0
- lightning_core-0.1.2/benchmarks/bench_cnn_dnn.cpp +296 -0
- lightning_core-0.1.2/benchmarks/bench_lstm_rnn.cpp +306 -0
- lightning_core-0.1.2/benchmarks/bench_matmul.cpp +107 -0
- lightning_core-0.1.2/benchmarks/bench_matrix_ops.cpp +208 -0
- lightning_core-0.1.2/benchmarks/bench_transformer.cpp +279 -0
- lightning_core-0.1.2/benchmarks/bench_vector_add.cpp +311 -0
- lightning_core-0.1.2/benchmarks/bench_vlm.cpp +334 -0
- lightning_core-0.1.2/benchmarks/generate_model_profile_env.sh +23 -0
- lightning_core-0.1.2/benchmarks/sweep_matrix_ops.sh +63 -0
- lightning_core-0.1.2/docs/advanced.md +178 -0
- lightning_core-0.1.2/docs/contributor.md +67 -0
- lightning_core-0.1.2/docs/index.md +54 -0
- lightning_core-0.1.2/docs/quickstart.md +72 -0
- lightning_core-0.1.2/examples/CMakeLists.txt +5 -0
- lightning_core-0.1.2/examples/lightning_core_c_api.c +49 -0
- lightning_core-0.1.2/include/cudajun/apple_ml.hpp +4 -0
- lightning_core-0.1.2/include/cudajun/attention.hpp +4 -0
- lightning_core-0.1.2/include/cudajun/detail/attention_backend.hpp +4 -0
- lightning_core-0.1.2/include/cudajun/detail/ops_backend.hpp +4 -0
- lightning_core-0.1.2/include/cudajun/model_customization.hpp +4 -0
- lightning_core-0.1.2/include/cudajun/models/dnn_cnn_fastpath.hpp +4 -0
- lightning_core-0.1.2/include/cudajun/models/graph_fastpath.hpp +4 -0
- lightning_core-0.1.2/include/cudajun/models/lstm_rnn_fastpath.hpp +4 -0
- lightning_core-0.1.2/include/cudajun/models/transformer_fastpath.hpp +4 -0
- lightning_core-0.1.2/include/cudajun/models/vlm_fastpath.hpp +4 -0
- lightning_core-0.1.2/include/cudajun/namespace_alias.hpp +6 -0
- lightning_core-0.1.2/include/cudajun/ops/policy.hpp +4 -0
- lightning_core-0.1.2/include/cudajun/ops.hpp +4 -0
- lightning_core-0.1.2/include/cudajun/runtime.hpp +4 -0
- lightning_core-0.1.2/include/cudajun/tensor.hpp +4 -0
- lightning_core-0.1.2/include/lightining_core/attention.hpp +4 -0
- lightning_core-0.1.2/include/lightining_core/lightining_core.h +53 -0
- lightning_core-0.1.2/include/lightining_core/lightining_core.hpp +12 -0
- lightning_core-0.1.2/include/lightining_core/model_customization.hpp +4 -0
- lightning_core-0.1.2/include/lightining_core/models/dnn_cnn_fastpath.hpp +4 -0
- lightning_core-0.1.2/include/lightining_core/models/graph_fastpath.hpp +4 -0
- lightning_core-0.1.2/include/lightining_core/models/lstm_rnn_fastpath.hpp +4 -0
- lightning_core-0.1.2/include/lightining_core/models/transformer_fastpath.hpp +4 -0
- lightning_core-0.1.2/include/lightining_core/models/vlm_fastpath.hpp +4 -0
- lightning_core-0.1.2/include/lightining_core/namespace_alias.hpp +3 -0
- lightning_core-0.1.2/include/lightining_core/ops/policy.hpp +4 -0
- lightning_core-0.1.2/include/lightining_core/ops.hpp +4 -0
- lightning_core-0.1.2/include/lightining_core/runtime.hpp +4 -0
- lightning_core-0.1.2/include/lightining_core/tensor.hpp +4 -0
- lightning_core-0.1.2/include/lightning_core/apple_ml.hpp +4 -0
- lightning_core-0.1.2/include/lightning_core/attention.hpp +4 -0
- lightning_core-0.1.2/include/lightning_core/core/apple_ml.hpp +19 -0
- lightning_core-0.1.2/include/lightning_core/core/attention.hpp +131 -0
- lightning_core-0.1.2/include/lightning_core/core/detail/attention_backend.hpp +60 -0
- lightning_core-0.1.2/include/lightning_core/core/detail/ops_backend.hpp +578 -0
- lightning_core-0.1.2/include/lightning_core/core/model_customization.hpp +187 -0
- lightning_core-0.1.2/include/lightning_core/core/models/dnn_cnn_fastpath.hpp +84 -0
- lightning_core-0.1.2/include/lightning_core/core/models/graph_fastpath.hpp +233 -0
- lightning_core-0.1.2/include/lightning_core/core/models/lstm_rnn_fastpath.hpp +98 -0
- lightning_core-0.1.2/include/lightning_core/core/models/transformer_fastpath.hpp +98 -0
- lightning_core-0.1.2/include/lightning_core/core/models/vlm_fastpath.hpp +202 -0
- lightning_core-0.1.2/include/lightning_core/core/ops/policy.hpp +144 -0
- lightning_core-0.1.2/include/lightning_core/core/ops.hpp +635 -0
- lightning_core-0.1.2/include/lightning_core/core/runtime.hpp +90 -0
- lightning_core-0.1.2/include/lightning_core/core/tensor.hpp +433 -0
- lightning_core-0.1.2/include/lightning_core/lightning_core.h +53 -0
- lightning_core-0.1.2/include/lightning_core/lightning_core.hpp +12 -0
- lightning_core-0.1.2/include/lightning_core/model_customization.hpp +4 -0
- lightning_core-0.1.2/include/lightning_core/models/dnn_cnn_fastpath.hpp +4 -0
- lightning_core-0.1.2/include/lightning_core/models/graph_fastpath.hpp +4 -0
- lightning_core-0.1.2/include/lightning_core/models/lstm_rnn_fastpath.hpp +4 -0
- lightning_core-0.1.2/include/lightning_core/models/transformer_fastpath.hpp +4 -0
- lightning_core-0.1.2/include/lightning_core/models/vlm_fastpath.hpp +4 -0
- lightning_core-0.1.2/include/lightning_core/namespace_alias.hpp +6 -0
- lightning_core-0.1.2/include/lightning_core/ops/policy.hpp +4 -0
- lightning_core-0.1.2/include/lightning_core/ops.hpp +4 -0
- lightning_core-0.1.2/include/lightning_core/runtime.hpp +4 -0
- lightning_core-0.1.2/include/lightning_core/tensor.hpp +4 -0
- lightning_core-0.1.2/pyproject.toml +58 -0
- lightning_core-0.1.2/python/CMakeLists.txt +80 -0
- lightning_core-0.1.2/python/bindings/bind_attention.cpp +192 -0
- lightning_core-0.1.2/python/bindings/bind_common.hpp +79 -0
- lightning_core-0.1.2/python/bindings/bind_ops.cpp +374 -0
- lightning_core-0.1.2/python/bindings/bind_runtime.cpp +9 -0
- lightning_core-0.1.2/python/bindings/bind_tensor.cpp +108 -0
- lightning_core-0.1.2/python/bindings/pybind_module.cpp +10 -0
- lightning_core-0.1.2/scripts/sync_remote_after_repo_rename.sh +110 -0
- lightning_core-0.1.2/src/apple_ml.mm +303 -0
- lightning_core-0.1.2/src/apple_ml_stub.cpp +33 -0
- lightning_core-0.1.2/src/attention.cpp +287 -0
- lightning_core-0.1.2/src/attention_cpu.cpp +165 -0
- lightning_core-0.1.2/src/attention_metal.mm +2214 -0
- lightning_core-0.1.2/src/lightning_core_c_api.cpp +148 -0
- lightning_core-0.1.2/src/ops/matmul_cpu.cpp +238 -0
- lightning_core-0.1.2/src/ops/matmul_metal.mm +1234 -0
- lightning_core-0.1.2/src/ops/matrix_elemwise.cpp +342 -0
- lightning_core-0.1.2/src/ops/matrix_elemwise_metal.mm +738 -0
- lightning_core-0.1.2/src/ops/vector_add.cpp +3 -0
- lightning_core-0.1.2/src/ops/vector_add_cpu.cpp +182 -0
- lightning_core-0.1.2/src/ops/vector_add_cuda.cu +88 -0
- lightning_core-0.1.2/src/ops/vector_add_metal.mm +414 -0
- lightning_core-0.1.2/src/ops/vector_add_metal_stub.cpp +89 -0
- lightning_core-0.1.2/src/ops/vector_add_stub.cpp +31 -0
- lightning_core-0.1.2/src/runtime.cpp +353 -0
- lightning_core-0.1.2/src/tensor.cpp +3 -0
- lightning_core-0.1.2/tests/CMakeLists.txt +34 -0
- lightning_core-0.1.2/tests/test_attention.cpp +224 -0
- lightning_core-0.1.2/tests/test_matmul.cpp +159 -0
- lightning_core-0.1.2/tests/test_matrix_ops.cpp +265 -0
- lightning_core-0.1.2/tests/test_model_customization.cpp +155 -0
- lightning_core-0.1.2/tests/test_runtime.cpp +40 -0
- lightning_core-0.1.2/tests/test_tensor.cpp +171 -0
- lightning_core-0.1.2/tests/test_vlm_fastpath.cpp +70 -0
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
name: Build and Publish Python Wheel
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
branches: [ main, master ]
|
|
6
|
+
push:
|
|
7
|
+
branches: [ main, master ]
|
|
8
|
+
tags:
|
|
9
|
+
- "v*"
|
|
10
|
+
workflow_dispatch:
|
|
11
|
+
inputs:
|
|
12
|
+
publish_target:
|
|
13
|
+
description: "Publish target for manual run"
|
|
14
|
+
required: true
|
|
15
|
+
default: "none"
|
|
16
|
+
type: choice
|
|
17
|
+
options:
|
|
18
|
+
- none
|
|
19
|
+
- testpypi
|
|
20
|
+
|
|
21
|
+
jobs:
|
|
22
|
+
build-macos-wheel:
|
|
23
|
+
name: Build macOS wheel
|
|
24
|
+
runs-on: macos-14
|
|
25
|
+
permissions:
|
|
26
|
+
contents: read
|
|
27
|
+
|
|
28
|
+
steps:
|
|
29
|
+
- name: Checkout
|
|
30
|
+
uses: actions/checkout@v4
|
|
31
|
+
|
|
32
|
+
- name: Setup Python
|
|
33
|
+
uses: actions/setup-python@v5
|
|
34
|
+
with:
|
|
35
|
+
python-version: "3.12"
|
|
36
|
+
|
|
37
|
+
- name: Build wheel and sdist
|
|
38
|
+
run: |
|
|
39
|
+
python -m pip install --upgrade pip
|
|
40
|
+
python -m pip install build twine
|
|
41
|
+
python -m build
|
|
42
|
+
|
|
43
|
+
- name: Validate distributions
|
|
44
|
+
run: python -m twine check dist/*
|
|
45
|
+
|
|
46
|
+
- name: Upload dist artifacts
|
|
47
|
+
uses: actions/upload-artifact@v4
|
|
48
|
+
with:
|
|
49
|
+
name: python-dist
|
|
50
|
+
path: dist/*
|
|
51
|
+
|
|
52
|
+
publish-pypi:
|
|
53
|
+
name: Publish to PyPI
|
|
54
|
+
if: startsWith(github.ref, 'refs/tags/v')
|
|
55
|
+
needs: [build-macos-wheel]
|
|
56
|
+
runs-on: ubuntu-latest
|
|
57
|
+
environment:
|
|
58
|
+
name: pypi
|
|
59
|
+
permissions:
|
|
60
|
+
contents: read
|
|
61
|
+
id-token: write
|
|
62
|
+
|
|
63
|
+
steps:
|
|
64
|
+
- name: Download dist artifacts
|
|
65
|
+
uses: actions/download-artifact@v4
|
|
66
|
+
with:
|
|
67
|
+
name: python-dist
|
|
68
|
+
path: dist
|
|
69
|
+
|
|
70
|
+
- name: Publish package distributions to PyPI
|
|
71
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
72
|
+
with:
|
|
73
|
+
skip-existing: true
|
|
74
|
+
|
|
75
|
+
publish-testpypi:
|
|
76
|
+
name: Publish to TestPyPI
|
|
77
|
+
if: github.event_name == 'workflow_dispatch' && github.event.inputs.publish_target == 'testpypi'
|
|
78
|
+
needs: [build-macos-wheel]
|
|
79
|
+
runs-on: ubuntu-latest
|
|
80
|
+
environment:
|
|
81
|
+
name: testpypi
|
|
82
|
+
permissions:
|
|
83
|
+
contents: read
|
|
84
|
+
id-token: write
|
|
85
|
+
|
|
86
|
+
steps:
|
|
87
|
+
- name: Download dist artifacts
|
|
88
|
+
uses: actions/download-artifact@v4
|
|
89
|
+
with:
|
|
90
|
+
name: python-dist
|
|
91
|
+
path: dist
|
|
92
|
+
|
|
93
|
+
- name: Publish package distributions to TestPyPI
|
|
94
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
95
|
+
with:
|
|
96
|
+
repository-url: https://test.pypi.org/legacy/
|
|
97
|
+
skip-existing: true
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
cmake_minimum_required(VERSION 3.23)
|
|
2
|
+
project(LightningCore VERSION 0.1.1 LANGUAGES CXX)
|
|
3
|
+
|
|
4
|
+
if(NOT APPLE)
|
|
5
|
+
message(FATAL_ERROR "Lightning Core currently supports macOS only.")
|
|
6
|
+
endif()
|
|
7
|
+
|
|
8
|
+
# CMake의 언어 감지 유틸을 끌어온다.
|
|
9
|
+
# 여기서 CUDA 감지할 때 쓰는데, 있으면 행복하고 없으면 CPU로 우회하면 그만임 ㅋ
|
|
10
|
+
include(CheckLanguage)
|
|
11
|
+
|
|
12
|
+
# 옵션들은 "오늘 어디까지 빌드할래?" 스위치들이다.
|
|
13
|
+
set(CJ_ENABLE_CUDA OFF CACHE BOOL "Enable native NVIDIA CUDA runtime path" FORCE)
|
|
14
|
+
set(CJ_ENABLE_METAL ON CACHE BOOL "Enable native Apple Metal runtime path" FORCE)
|
|
15
|
+
if(DEFINED SKBUILD)
|
|
16
|
+
option(CJ_BUILD_TESTS "Build unit tests" OFF)
|
|
17
|
+
option(CJ_BUILD_BENCHMARKS "Build benchmark binaries" OFF)
|
|
18
|
+
option(CJ_BUILD_PYTHON "Build Python bindings (pybind11)" ON)
|
|
19
|
+
option(CJ_BUILD_EXAMPLES "Build C/C++ examples" OFF)
|
|
20
|
+
else()
|
|
21
|
+
option(CJ_BUILD_TESTS "Build unit tests" ON)
|
|
22
|
+
option(CJ_BUILD_BENCHMARKS "Build benchmark binaries" ON)
|
|
23
|
+
option(CJ_BUILD_PYTHON "Build Python bindings (pybind11)" ON)
|
|
24
|
+
option(CJ_BUILD_EXAMPLES "Build C/C++ examples" ON)
|
|
25
|
+
endif()
|
|
26
|
+
|
|
27
|
+
# C++17로 통일해서 플랫폼별 컴파일러 성격 차이를 줄인다.
|
|
28
|
+
set(CMAKE_CXX_STANDARD 17)
|
|
29
|
+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
30
|
+
set(CMAKE_CXX_EXTENSIONS OFF)
|
|
31
|
+
|
|
32
|
+
if(NOT CMAKE_BUILD_TYPE)
|
|
33
|
+
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
|
34
|
+
endif()
|
|
35
|
+
|
|
36
|
+
# 기본은 보수적으로 OFF, 감지 성공하면 ON으로 전환한다.
|
|
37
|
+
set(CJ_HAS_CUDA OFF)
|
|
38
|
+
set(CJ_HAS_METAL OFF)
|
|
39
|
+
if(CJ_ENABLE_CUDA)
|
|
40
|
+
# CUDA 컴파일러가 있는지 먼저 살핀다.
|
|
41
|
+
check_language(CUDA)
|
|
42
|
+
if(CMAKE_CUDA_COMPILER)
|
|
43
|
+
# 여기 들어오면 .cu도 진짜로 빌드 가능한 상태다.
|
|
44
|
+
enable_language(CUDA)
|
|
45
|
+
set(CJ_HAS_CUDA ON)
|
|
46
|
+
message(STATUS "CUDA compiler detected: ${CMAKE_CUDA_COMPILER}")
|
|
47
|
+
else()
|
|
48
|
+
# macOS 같은 환경에서 자주 보게 되는 경고. 정상 동작이며 CPU 폴백으로 간다.
|
|
49
|
+
message(WARNING "CUDA compiler not found. Building CPU-only path.")
|
|
50
|
+
endif()
|
|
51
|
+
endif()
|
|
52
|
+
|
|
53
|
+
if(APPLE AND CJ_ENABLE_METAL)
|
|
54
|
+
check_language(OBJCXX)
|
|
55
|
+
if(CMAKE_OBJCXX_COMPILER)
|
|
56
|
+
enable_language(OBJCXX)
|
|
57
|
+
set(CJ_HAS_METAL ON)
|
|
58
|
+
message(STATUS "Metal backend enabled with Objective-C++ compiler: ${CMAKE_OBJCXX_COMPILER}")
|
|
59
|
+
else()
|
|
60
|
+
message(WARNING "Objective-C++ compiler not found. Metal backend disabled.")
|
|
61
|
+
endif()
|
|
62
|
+
endif()
|
|
63
|
+
|
|
64
|
+
# 공통 소스: 런타임, 텐서, 디스패치, CPU 구현.
|
|
65
|
+
set(CJ_SOURCES
|
|
66
|
+
src/runtime.cpp
|
|
67
|
+
src/lightning_core_c_api.cpp
|
|
68
|
+
src/attention.cpp
|
|
69
|
+
src/attention_cpu.cpp
|
|
70
|
+
src/ops/matmul_cpu.cpp
|
|
71
|
+
src/ops/matrix_elemwise.cpp
|
|
72
|
+
src/ops/vector_add_cpu.cpp
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
if(APPLE)
|
|
76
|
+
list(APPEND CJ_SOURCES src/apple_ml.mm)
|
|
77
|
+
else()
|
|
78
|
+
list(APPEND CJ_SOURCES src/apple_ml_stub.cpp)
|
|
79
|
+
endif()
|
|
80
|
+
|
|
81
|
+
if(CJ_HAS_CUDA)
|
|
82
|
+
# CUDA 가능 시 커널 파일 포함.
|
|
83
|
+
list(APPEND CJ_SOURCES src/ops/vector_add_cuda.cu)
|
|
84
|
+
else()
|
|
85
|
+
# CUDA 미지원 시 링커 에러 안 나게 스텁 구현 포함.
|
|
86
|
+
list(APPEND CJ_SOURCES src/ops/vector_add_stub.cpp)
|
|
87
|
+
endif()
|
|
88
|
+
|
|
89
|
+
if(CJ_HAS_METAL)
|
|
90
|
+
list(APPEND CJ_SOURCES src/attention_metal.mm)
|
|
91
|
+
list(APPEND CJ_SOURCES src/ops/matmul_metal.mm)
|
|
92
|
+
list(APPEND CJ_SOURCES src/ops/matrix_elemwise_metal.mm)
|
|
93
|
+
list(APPEND CJ_SOURCES src/ops/vector_add_metal.mm)
|
|
94
|
+
else()
|
|
95
|
+
list(APPEND CJ_SOURCES src/attention_metal.mm)
|
|
96
|
+
list(APPEND CJ_SOURCES src/ops/matmul_metal.mm)
|
|
97
|
+
list(APPEND CJ_SOURCES src/ops/matrix_elemwise_metal.mm)
|
|
98
|
+
list(APPEND CJ_SOURCES src/ops/vector_add_metal_stub.cpp)
|
|
99
|
+
endif()
|
|
100
|
+
|
|
101
|
+
# 코어 라이브러리 타깃과 네임스페이스 별칭.
|
|
102
|
+
add_library(lightning_core_core STATIC ${CJ_SOURCES})
|
|
103
|
+
add_library(lightning_core::lightning_core ALIAS lightning_core_core)
|
|
104
|
+
add_library(cudajun::cudajun ALIAS lightning_core_core)
|
|
105
|
+
set_target_properties(lightning_core_core PROPERTIES OUTPUT_NAME lightning_core)
|
|
106
|
+
|
|
107
|
+
target_compile_options(lightning_core_core PRIVATE
|
|
108
|
+
$<$<CONFIG:Release>:-O3 -ffast-math -funroll-loops>
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
# 공개 헤더 경로를 내보내서 외부 타깃에서도 include 가능하게 한다.
|
|
112
|
+
target_include_directories(lightning_core_core
|
|
113
|
+
PUBLIC
|
|
114
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
if(CJ_HAS_CUDA)
|
|
118
|
+
# 코드에서 #if CJ_HAS_CUDA로 분기할 수 있도록 매크로 주입.
|
|
119
|
+
target_compile_definitions(lightning_core_core PUBLIC CJ_HAS_CUDA=1)
|
|
120
|
+
set_target_properties(lightning_core_core PROPERTIES
|
|
121
|
+
# CUDA separable compilation: 커널/호스트 코드를 파일 단위로 나눠도 링크 가능.
|
|
122
|
+
CUDA_SEPARABLE_COMPILATION ON
|
|
123
|
+
CUDA_STANDARD 17
|
|
124
|
+
CUDA_STANDARD_REQUIRED ON
|
|
125
|
+
)
|
|
126
|
+
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
|
|
127
|
+
# 아키텍처 미지정이면 로컬에 맞춰 자동 추정.
|
|
128
|
+
set_target_properties(lightning_core_core PROPERTIES CUDA_ARCHITECTURES "native")
|
|
129
|
+
endif()
|
|
130
|
+
# CUDA 런타임(cudart) 링크.
|
|
131
|
+
target_link_libraries(lightning_core_core PUBLIC CUDA::cudart)
|
|
132
|
+
else()
|
|
133
|
+
# CPU 빌드에서도 코드 경로가 명확히 갈리도록 0을 박아둔다.
|
|
134
|
+
target_compile_definitions(lightning_core_core PUBLIC CJ_HAS_CUDA=0)
|
|
135
|
+
endif()
|
|
136
|
+
|
|
137
|
+
if(CJ_HAS_METAL)
|
|
138
|
+
target_compile_definitions(lightning_core_core PUBLIC CJ_HAS_METAL=1)
|
|
139
|
+
target_link_libraries(lightning_core_core PUBLIC "-framework Metal" "-framework Foundation")
|
|
140
|
+
else()
|
|
141
|
+
target_compile_definitions(lightning_core_core PUBLIC CJ_HAS_METAL=0)
|
|
142
|
+
endif()
|
|
143
|
+
|
|
144
|
+
if(APPLE)
|
|
145
|
+
target_link_libraries(lightning_core_core PUBLIC "-framework CoreML" "-framework MetalPerformanceShaders" "-framework MetalPerformanceShadersGraph")
|
|
146
|
+
endif()
|
|
147
|
+
|
|
148
|
+
# 플랫폼 식별 매크로: OS별 조건 컴파일용.
|
|
149
|
+
if(WIN32)
|
|
150
|
+
target_compile_definitions(lightning_core_core PUBLIC CJ_PLATFORM_WINDOWS=1)
|
|
151
|
+
elseif(APPLE)
|
|
152
|
+
target_compile_definitions(lightning_core_core PUBLIC CJ_PLATFORM_MACOS=1)
|
|
153
|
+
elseif(UNIX)
|
|
154
|
+
target_compile_definitions(lightning_core_core PUBLIC CJ_PLATFORM_LINUX=1)
|
|
155
|
+
endif()
|
|
156
|
+
|
|
157
|
+
# 선택 옵션에 따라 하위 모듈들을 순차적으로 연결한다.
|
|
158
|
+
if(CJ_BUILD_TESTS)
|
|
159
|
+
enable_testing()
|
|
160
|
+
add_subdirectory(tests)
|
|
161
|
+
endif()
|
|
162
|
+
|
|
163
|
+
if(CJ_BUILD_BENCHMARKS)
|
|
164
|
+
add_subdirectory(benchmarks)
|
|
165
|
+
endif()
|
|
166
|
+
|
|
167
|
+
if(CJ_BUILD_PYTHON)
|
|
168
|
+
add_subdirectory(python)
|
|
169
|
+
endif()
|
|
170
|
+
|
|
171
|
+
if(CJ_BUILD_EXAMPLES)
|
|
172
|
+
add_subdirectory(examples)
|
|
173
|
+
endif()
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: lightning-core
|
|
3
|
+
Version: 0.1.2
|
|
4
|
+
Summary: Lightning Core: macOS-first CUDA-style runtime with Metal backend
|
|
5
|
+
Keywords: metal,macos,gpu,runtime,cuda-style,pybind11
|
|
6
|
+
Author: Lightning Core Team
|
|
7
|
+
Maintainer: Lightning Core Team
|
|
8
|
+
License: Proprietary
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Programming Language :: C++
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
21
|
+
Classifier: Operating System :: MacOS :: MacOS X
|
|
22
|
+
Project-URL: Homepage, https://github.com/wnsgus00114-droid/lightining-core
|
|
23
|
+
Project-URL: Repository, https://github.com/wnsgus00114-droid/lightining-core
|
|
24
|
+
Project-URL: Issues, https://github.com/wnsgus00114-droid/lightining-core/issues
|
|
25
|
+
Requires-Python: >=3.9
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
|
|
28
|
+
# Lightning Core
|
|
29
|
+
|
|
30
|
+
Lightning Core is a macOS-first CUDA-style runtime focused on custom attention training/inference paths.
|
|
31
|
+
|
|
32
|
+
## Quick Start (Beginner)
|
|
33
|
+
|
|
34
|
+
Documentation entrypoint:
|
|
35
|
+
|
|
36
|
+
- docs/index.md
|
|
37
|
+
|
|
38
|
+
Use this path first:
|
|
39
|
+
|
|
40
|
+
1. Install and import-check
|
|
41
|
+
2. Build and run one C API example
|
|
42
|
+
3. Run tests
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
python3 -m pip install .
|
|
46
|
+
python -c "import lightning_core; print(lightning_core.backend_name())"
|
|
47
|
+
|
|
48
|
+
cmake -S . -B build -DCJ_ENABLE_METAL=ON -DCJ_BUILD_TESTS=ON -DCJ_BUILD_PYTHON=ON -DCJ_BUILD_EXAMPLES=ON
|
|
49
|
+
cmake --build build -j
|
|
50
|
+
|
|
51
|
+
cmake --build build --target lightning_core_c_api_example -j
|
|
52
|
+
./build/lightning_core_c_api_example
|
|
53
|
+
|
|
54
|
+
ctest --test-dir build --output-on-failure
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Detailed beginner guide:
|
|
58
|
+
|
|
59
|
+
- docs/quickstart.md
|
|
60
|
+
|
|
61
|
+
## Scope (Current)
|
|
62
|
+
|
|
63
|
+
This project is an optimization-focused runtime prototype, not a full deep learning framework.
|
|
64
|
+
|
|
65
|
+
- Core focus: runtime, attention path, selected matrix/vector ops
|
|
66
|
+
- Model-family wrappers are advanced policy/fastpath helpers, not full model implementations
|
|
67
|
+
- API and internals are still actively evolving
|
|
68
|
+
|
|
69
|
+
## Identity and Naming
|
|
70
|
+
|
|
71
|
+
- Public package/module: lightning-core / lightning_core
|
|
72
|
+
- Public C++ include path/namespace: lightning_core/* and lightning_core::...
|
|
73
|
+
- Internal canonical headers: include/lightning_core/core/*
|
|
74
|
+
- Legacy include/cudajun/* remains as compatibility shim
|
|
75
|
+
|
|
76
|
+
## Advanced Topics
|
|
77
|
+
|
|
78
|
+
For advanced usage and operations, see:
|
|
79
|
+
|
|
80
|
+
- docs/advanced.md
|
|
81
|
+
|
|
82
|
+
For contributor workflow and coding conventions, see:
|
|
83
|
+
|
|
84
|
+
- docs/contributor.md
|
|
85
|
+
|
|
86
|
+
Includes:
|
|
87
|
+
|
|
88
|
+
- benchmark sweeps and generated artifacts
|
|
89
|
+
- resident session and policy tuning
|
|
90
|
+
- model-family wrapper examples and caveats
|
|
91
|
+
- runtime profile/env tuning
|
|
92
|
+
- release and publishing workflow notes
|
|
93
|
+
- repository rename transition operations
|
|
94
|
+
|
|
95
|
+
## Build Targets
|
|
96
|
+
|
|
97
|
+
Useful targets:
|
|
98
|
+
|
|
99
|
+
- library: lightning_core::lightning_core
|
|
100
|
+
- python module: lightning_core
|
|
101
|
+
- c api example: lightning_core_c_api_example
|
|
102
|
+
|
|
103
|
+
## Repository Rename Status
|
|
104
|
+
|
|
105
|
+
Current GitHub live URL may still be lightining-core until rename is completed.
|
|
106
|
+
|
|
107
|
+
Use helper script after rename:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
./scripts/sync_remote_after_repo_rename.sh --dry-run
|
|
111
|
+
./scripts/sync_remote_after_repo_rename.sh
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
The script automatically checks target repository availability and skips safely when rename is not ready.
|
|
115
|
+
|
|
116
|
+
## Project Layout
|
|
117
|
+
|
|
118
|
+
- include/lightning_core: public wrappers
|
|
119
|
+
- include/lightning_core/core: canonical internal headers
|
|
120
|
+
- include/cudajun: compatibility shims for legacy integrations
|
|
121
|
+
- src: runtime + tensor + ops implementation
|
|
122
|
+
- tests: C++ unit tests
|
|
123
|
+
- benchmarks: benchmark binaries and sweep scripts
|
|
124
|
+
- python: pybind11 bindings
|
|
125
|
+
- docs: split docs (index/quickstart/advanced/contributor)
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# Lightning Core
|
|
2
|
+
|
|
3
|
+
Lightning Core is a macOS-first CUDA-style runtime focused on custom attention training/inference paths.
|
|
4
|
+
|
|
5
|
+
## Quick Start (Beginner)
|
|
6
|
+
|
|
7
|
+
Documentation entrypoint:
|
|
8
|
+
|
|
9
|
+
- docs/index.md
|
|
10
|
+
|
|
11
|
+
Use this path first:
|
|
12
|
+
|
|
13
|
+
1. Install and import-check
|
|
14
|
+
2. Build and run one C API example
|
|
15
|
+
3. Run tests
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
python3 -m pip install .
|
|
19
|
+
python -c "import lightning_core; print(lightning_core.backend_name())"
|
|
20
|
+
|
|
21
|
+
cmake -S . -B build -DCJ_ENABLE_METAL=ON -DCJ_BUILD_TESTS=ON -DCJ_BUILD_PYTHON=ON -DCJ_BUILD_EXAMPLES=ON
|
|
22
|
+
cmake --build build -j
|
|
23
|
+
|
|
24
|
+
cmake --build build --target lightning_core_c_api_example -j
|
|
25
|
+
./build/lightning_core_c_api_example
|
|
26
|
+
|
|
27
|
+
ctest --test-dir build --output-on-failure
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Detailed beginner guide:
|
|
31
|
+
|
|
32
|
+
- docs/quickstart.md
|
|
33
|
+
|
|
34
|
+
## Scope (Current)
|
|
35
|
+
|
|
36
|
+
This project is an optimization-focused runtime prototype, not a full deep learning framework.
|
|
37
|
+
|
|
38
|
+
- Core focus: runtime, attention path, selected matrix/vector ops
|
|
39
|
+
- Model-family wrappers are advanced policy/fastpath helpers, not full model implementations
|
|
40
|
+
- API and internals are still actively evolving
|
|
41
|
+
|
|
42
|
+
## Identity and Naming
|
|
43
|
+
|
|
44
|
+
- Public package/module: lightning-core / lightning_core
|
|
45
|
+
- Public C++ include path/namespace: lightning_core/* and lightning_core::...
|
|
46
|
+
- Internal canonical headers: include/lightning_core/core/*
|
|
47
|
+
- Legacy include/cudajun/* remains as compatibility shim
|
|
48
|
+
|
|
49
|
+
## Advanced Topics
|
|
50
|
+
|
|
51
|
+
For advanced usage and operations, see:
|
|
52
|
+
|
|
53
|
+
- docs/advanced.md
|
|
54
|
+
|
|
55
|
+
For contributor workflow and coding conventions, see:
|
|
56
|
+
|
|
57
|
+
- docs/contributor.md
|
|
58
|
+
|
|
59
|
+
Includes:
|
|
60
|
+
|
|
61
|
+
- benchmark sweeps and generated artifacts
|
|
62
|
+
- resident session and policy tuning
|
|
63
|
+
- model-family wrapper examples and caveats
|
|
64
|
+
- runtime profile/env tuning
|
|
65
|
+
- release and publishing workflow notes
|
|
66
|
+
- repository rename transition operations
|
|
67
|
+
|
|
68
|
+
## Build Targets
|
|
69
|
+
|
|
70
|
+
Useful targets:
|
|
71
|
+
|
|
72
|
+
- library: lightning_core::lightning_core
|
|
73
|
+
- python module: lightning_core
|
|
74
|
+
- c api example: lightning_core_c_api_example
|
|
75
|
+
|
|
76
|
+
## Repository Rename Status
|
|
77
|
+
|
|
78
|
+
Current GitHub live URL may still be lightining-core until rename is completed.
|
|
79
|
+
|
|
80
|
+
Use helper script after rename:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
./scripts/sync_remote_after_repo_rename.sh --dry-run
|
|
84
|
+
./scripts/sync_remote_after_repo_rename.sh
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
The script automatically checks target repository availability and skips safely when rename is not ready.
|
|
88
|
+
|
|
89
|
+
## Project Layout
|
|
90
|
+
|
|
91
|
+
- include/lightning_core: public wrappers
|
|
92
|
+
- include/lightning_core/core: canonical internal headers
|
|
93
|
+
- include/cudajun: compatibility shims for legacy integrations
|
|
94
|
+
- src: runtime + tensor + ops implementation
|
|
95
|
+
- tests: C++ unit tests
|
|
96
|
+
- benchmarks: benchmark binaries and sweep scripts
|
|
97
|
+
- python: pybind11 bindings
|
|
98
|
+
- docs: split docs (index/quickstart/advanced/contributor)
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# Lightning Core Roadmap
|
|
2
|
+
|
|
3
|
+
This roadmap tracks known architecture gaps and compatibility work.
|
|
4
|
+
|
|
5
|
+
## P0: Branding and Surface Consistency
|
|
6
|
+
|
|
7
|
+
- Unify public naming across package, docs, targets, and namespaces.
|
|
8
|
+
- Keep compatibility wrappers for existing `cudajun` users during migration.
|
|
9
|
+
- Ensure metadata and repository URLs always match the canonical repo.
|
|
10
|
+
|
|
11
|
+
## P1: Documentation and Expectation Management
|
|
12
|
+
|
|
13
|
+
- Keep README scope explicit: optimization runtime prototype, not full framework.
|
|
14
|
+
- Separate "implemented now" vs "planned" in docs.
|
|
15
|
+
- Standardize comment/doc tone for external contributors.
|
|
16
|
+
|
|
17
|
+
## P1: Backend Abstraction Cleanup
|
|
18
|
+
|
|
19
|
+
- Remove CUDA-centric internal identifiers where backend-neutral semantics are intended.
|
|
20
|
+
- Clarify memory model contracts for each backend.
|
|
21
|
+
- Reduce hidden backend-specific behavior under generic runtime names.
|
|
22
|
+
- Gradually migrate internal namespace naming from `cudajun` to canonical core naming while preserving compatibility.
|
|
23
|
+
|
|
24
|
+
## P2: Tensor Core Expansion
|
|
25
|
+
|
|
26
|
+
- Add dtype/layout/stride metadata.
|
|
27
|
+
- Add contiguous and view/slice semantics.
|
|
28
|
+
- Add stricter shape/lifetime validation and richer error paths.
|
|
29
|
+
- Split responsibilities over time: storage, metadata, view rules, and reshape/slice validation layers.
|
|
30
|
+
|
|
31
|
+
## P2: Ops Layer Modularization
|
|
32
|
+
|
|
33
|
+
- Split monolithic ops surfaces into:
|
|
34
|
+
- `ops/vector`
|
|
35
|
+
- `ops/matrix`
|
|
36
|
+
- `ops/policy`
|
|
37
|
+
- `ops/session`
|
|
38
|
+
- Preserve high-level helper APIs while reducing header bloat.
|
|
39
|
+
|
|
40
|
+
## P2: Python API Expansion
|
|
41
|
+
|
|
42
|
+
- Expose attention and selected matmul/session APIs.
|
|
43
|
+
- Expose policy controls with safe defaults.
|
|
44
|
+
- Keep minimal onboarding path for first-time users.
|
|
45
|
+
- Keep bindings modular (`bind_tensor`, `bind_ops`, `bind_attention`, `bind_runtime`) for maintainability.
|
|
46
|
+
- Improve numpy/buffer-oriented in-place paths for resident sessions.
|
|
47
|
+
|
|
48
|
+
## P2: Test Depth
|
|
49
|
+
|
|
50
|
+
- Add resident-loop stability tests.
|
|
51
|
+
- Add fuzz/edge-case shape tests.
|
|
52
|
+
- Add policy-combination and memory-lifetime regression tests.
|
|
53
|
+
|
|
54
|
+
## P3: Build Matrix Evolution
|
|
55
|
+
|
|
56
|
+
- Current mode is macOS-first by design.
|
|
57
|
+
- Add optional CPU-only CI profiles for Linux/Windows to prevent accidental lock-in.
|
|
58
|
+
- Keep platform expansion behind explicit build options.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# vector add 마이크로 벤치 바이너리.
|
|
2
|
+
add_executable(bench_vector_add bench_vector_add.cpp)
|
|
3
|
+
target_link_libraries(bench_vector_add PRIVATE lightning_core::lightning_core)
|
|
4
|
+
|
|
5
|
+
# attention 전용 벤치 바이너리.
|
|
6
|
+
add_executable(bench_attention bench_attention.cpp)
|
|
7
|
+
target_link_libraries(bench_attention PRIVATE lightning_core::lightning_core)
|
|
8
|
+
|
|
9
|
+
# 기본 행렬곱 벤치 바이너리.
|
|
10
|
+
add_executable(bench_matmul bench_matmul.cpp)
|
|
11
|
+
target_link_libraries(bench_matmul PRIVATE lightning_core::lightning_core)
|
|
12
|
+
|
|
13
|
+
# 행렬 원소별 뺄셈/나눗셈 벤치 바이너리.
|
|
14
|
+
add_executable(bench_matrix_ops bench_matrix_ops.cpp)
|
|
15
|
+
target_link_libraries(bench_matrix_ops PRIVATE lightning_core::lightning_core)
|
|
16
|
+
|
|
17
|
+
# 모델 단위 벤치 바이너리.
|
|
18
|
+
add_executable(bench_transformer bench_transformer.cpp)
|
|
19
|
+
target_link_libraries(bench_transformer PRIVATE lightning_core::lightning_core)
|
|
20
|
+
|
|
21
|
+
add_executable(bench_lstm_rnn bench_lstm_rnn.cpp)
|
|
22
|
+
target_link_libraries(bench_lstm_rnn PRIVATE lightning_core::lightning_core)
|
|
23
|
+
|
|
24
|
+
add_executable(bench_cnn_dnn bench_cnn_dnn.cpp)
|
|
25
|
+
target_link_libraries(bench_cnn_dnn PRIVATE lightning_core::lightning_core)
|
|
26
|
+
|
|
27
|
+
add_executable(bench_vlm bench_vlm.cpp)
|
|
28
|
+
target_link_libraries(bench_vlm PRIVATE lightning_core::lightning_core)
|