PyPI - buildz-gpu - Versions diffs - 0.1.0__tar.gz - Mend

buildz-gpu 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

buildz_gpu-0.1.0/MANIFEST.in +2 -0
buildz_gpu-0.1.0/PKG-INFO +21 -0
buildz_gpu-0.1.0/README.md +9 -0
buildz_gpu-0.1.0/buildz/gpu/az/__init__.py +13 -0
buildz_gpu-0.1.0/buildz/gpu/az/attrn.py +42 -0
buildz_gpu-0.1.0/buildz/gpu/az/conv.py +50 -0
buildz_gpu-0.1.0/buildz/gpu/az/ln.py +32 -0
buildz_gpu-0.1.0/buildz/gpu/az/nets.py +25 -0
buildz_gpu-0.1.0/buildz/gpu/az/nreshape.py +12 -0
buildz_gpu-0.1.0/buildz/gpu/az/reshape.py +16 -0
buildz_gpu-0.1.0/buildz/gpu/az/ups.py +40 -0
buildz_gpu-0.1.0/buildz/gpu/az/util.py +45 -0
buildz_gpu-0.1.0/buildz/gpu/demo/az.js +22 -0
buildz_gpu-0.1.0/buildz/gpu/demo/az.py +72 -0
buildz_gpu-0.1.0/buildz/gpu/demo/cal_cpu.py +44 -0
buildz_gpu-0.1.0/buildz/gpu/demo/cal_gpu.py +44 -0
buildz_gpu-0.1.0/buildz/gpu/demo/trans.py +34 -0
buildz_gpu-0.1.0/buildz/gpu/demo/trans2.py +32 -0
buildz_gpu-0.1.0/buildz/gpu/test/demo.py +168 -0
buildz_gpu-0.1.0/buildz/gpu/test/test_recal.py +109 -0
buildz_gpu-0.1.0/buildz/gpu/torch/__init__.py +9 -0
buildz_gpu-0.1.0/buildz/gpu/torch/dv.py +121 -0
buildz_gpu-0.1.0/buildz/gpu/torch/middle_base.py +102 -0
buildz_gpu-0.1.0/buildz/gpu/torch/middle_cache.py +184 -0
buildz_gpu-0.1.0/buildz/gpu/torch/recal.py +124 -0
buildz_gpu-0.1.0/buildz/none +0 -0
buildz_gpu-0.1.0/buildz_gpu.egg-info/PKG-INFO +21 -0
buildz_gpu-0.1.0/buildz_gpu.egg-info/SOURCES.txt +31 -0
buildz_gpu-0.1.0/buildz_gpu.egg-info/dependency_links.txt +1 -0
buildz_gpu-0.1.0/buildz_gpu.egg-info/requires.txt +1 -0
buildz_gpu-0.1.0/buildz_gpu.egg-info/top_level.txt +1 -0
buildz_gpu-0.1.0/setup.cfg +4 -0
buildz_gpu-0.1.0/setup.py +23 -0

buildz_gpu-0.1.0/MANIFEST.in ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ include README.md
2	+ recursive-include buildz *

buildz_gpu-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,21 @@
+Metadata-Version: 2.1
+Name: buildz_gpu
+Version: 0.1.0
+Summary: 用python写的gpu模型训练相关工具, buildz包的一部分(buildz.gpu)
+Home-page: https://github.com/buildCodeZ/buildz
+Author: Zzz
+Author-email: 1309458652@qq.com
+License: Apache License 2.0
+Keywords: buildz
+Platform: any
+Description-Content-Type: text/markdown
+# buildz
+声明:
+禁止将本项目代码用于ai训练
+declaration:
+Codes of this project are not allowed to be used for AI training or any other form of machine learning processes.
+```
+用python写的gpu模型训练相关工具
+```

buildz_gpu-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,9 @@
+# buildz
+声明:
+禁止将本项目代码用于ai训练
+declaration:
+Codes of this project are not allowed to be used for AI training or any other form of machine learning processes.
+```
+用python写的gpu模型训练相关工具
+```

buildz_gpu-0.1.0/buildz/gpu/az/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+#
+from .attrn import MultiAttrn
+from .conv import Conv
+from .ups import Upsample
+from .ln import Linear
+from .reshape import Reshape
+from .nets import Nets
+from .util import *
+__doc__="""
+分析工具
+"""

buildz_gpu-0.1.0/buildz/gpu/az/attrn.py ADDED Viewed

@@ -0,0 +1,42 @@
+from buildz.base import Base
+from .util import *
+class MultiAttrn(Base):
+    def init(self, din, dout, num_heads, bias=True):
+        self.din=din
+        self.dout=dout
+        self.num_heads=num_heads
+        self.dout_per_head = dout//num_heads
+        self.bias=bias
+    def fetch(self, shape):
+        if type(shape)==int:
+            shape = [shape]
+        shape = list(shape)
+        while len(shape)<3:
+            shape = [1]+shape
+        din = shape[2]
+        seq_n = shape[1]
+        batch = shape[0]
+        return batch, seq_n, din
+    def cache(self, shape, unit=1):
+        batch, seq_n, din = self.fetch(shape)
+        n_input = 3*batch*seq_n*din
+        n_w = 2*batch*self.num_heads*seq_n*seq_n
+        n = n_input+n_w
+        return n*unit
+    def cal(self, shape):
+        batch, seq_n, din = self.fetch(shape)
+        n = 6*batch*seq_n*din*self.dout+2*batch*self.num_heads*seq_n*seq_n*self.dout_per_head+2*batch*seq_n*self.dout*self.dout
+        return n
+    def backcal(self, shape):
+        return 2*self.cal(shape)
+    def size(self, unit=1):
+        kqv = 3*(self.din*self.dout+self.dout*self.bias)
+        out = self.dout*self.dout+self.dout*self.bias
+        n = kqv+out
+        return n*unit
+    def out(self, shape):
+        batch, seq_n, din = self.fetch(shape)
+        return [batch, seq_n, self.dout]
+pass

buildz_gpu-0.1.0/buildz/gpu/az/conv.py ADDED Viewed

@@ -0,0 +1,50 @@
+from buildz.base import Base
+from .util import *
+class Conv(Base):
+    """
+        [batch, channel, dim1, dim2, ...]
+    """
+    def init(self, dims, ch_in, ch_out, kernel, bias=True, stride=1, padding=0):
+        self.ch_in = ch_in
+        self.ch_out = ch_out
+        self.dims = dims
+        self.bias=bias
+        self.kernel = exp(kernel, dims)
+        self.stride=exp(stride, dims)
+        self.padding=exp(padding, dims)
+    def fetch(self, shape):
+        if type(shape)==int:
+            shape = [shape]
+        shape = list(shape)
+        while len(shape)<self.dims+2:
+            shape = [1]+shape
+        batch = shape[0]
+        ch = shape[1]
+        return batch,ch, shape[2:]
+    def out(self, shape):
+        batch, ch, szs = self.fetch(shape)
+        outs = [batch, self.ch_out]
+        for kernel_size, stride, padding, dim_in in zip(self.kernel, self.stride, self.padding, szs):
+            val = int((dim_in - kernel_size + 2*padding)/stride)+1
+            outs.append(val)
+        return outs
+    def cache(self, shape, unit=1):
+        # shape = [batch, channel, dim1, dim2, ...]
+        batch, ch, szs = self.fetch(shape)
+        n_input = batch*ch*mul(shape[2:])
+        n_w = self.ch_out*self.ch_in*mul(self.kernel)
+        n = n_input+n_w
+        return n*unit
+    def cal(self, shape):
+        out = self.out(shape)
+        batch = shape[0]
+        ch = shape[1]
+        out_szs = shape[2:]
+        n = mul(out_szs)*self.ch_out*self.ch_in*mul(self.kernel)*2
+        return n
+    def backcal(self, batch=1):
+        return 2*self.cal(batch)
+    def size(self, unit=1):
+        n = self.ch_in*self.ch_out*mul(self.kernel)+self.bias*self.ch_out
+        return n*unit

buildz_gpu-0.1.0/buildz/gpu/az/ln.py ADDED Viewed

@@ -0,0 +1,32 @@
+from buildz.base import Base
+from .util import *
+class Linear(Base):
+    def init(self, din, dout, bias=True):
+        self.din=din
+        self.dout=dout
+        self.bias=bias
+    def fetch(self, shape):
+        if type(shape)==int:
+            shape = [shape]
+        shape = list(shape)
+        if len(shape)<2:
+            shape = [1]+shape
+        # print(f"shape:{shape}")
+        din = shape[-1]
+        batch = mul(shape[:-1])
+        return batch, din
+    def cache(self, shape, unit=1):
+        batch, din = self.fetch(shape)
+        return (batch*self.din+self.din*self.dout)*unit
+    def cal(self, shape):
+        batch, din = self.fetch(shape)
+        return 2*batch*self.din*self.dout
+    def backcal(self, shape):
+        return 2*self.cal(shape)
+    def size(self, unit=1):
+        n = self.din*self.dout+self.bias*self.dout
+        return n*unit
+    def out(self, shape):
+        batch, din = self.fetch(shape)
+        return [batch, self.dout]

buildz_gpu-0.1.0/buildz/gpu/az/nets.py ADDED Viewed

@@ -0,0 +1,25 @@
+from buildz.base import Base
+from .util import *
+class Nets(Base):
+    def init(self, *nets):
+        self.nets = nets
+    def call(self, fc, shape, *a, **b):
+        rst = 0
+        for net in self.nets:
+            rst+= getattr(net, fc)(shape, *a, **b)
+            shape = net.out(shape)
+        return rst
+    def cache(self, shape, unit=1):
+        return self.call("cache", shape, unit)
+    def cal(self, shape):
+        return self.call("cal", shape)
+    def backcal(self, shape):
+        return self.call("backcal", shape)
+    def size(self, unit=1):
+        sizes = [net.size(unit) for net in self.nets]
+        return sum(sizes)
+    def out(self, shape):
+        for net in self.nets:
+            shape = net.out(shape)
+        return shape

buildz_gpu-0.1.0/buildz/gpu/az/nreshape.py ADDED Viewed

@@ -0,0 +1,12 @@
+from torch import nn
+class ReshapeModule(nn.Module):
+    def __init__(self, shape):
+        super().__init__()
+        self.shape = shape
+    def forward(self, inputs):
+        inputs = inputs.reshape(self.shape)
+        return inputs
+def reshape(shape):
+    return ReshapeModule(shape)

buildz_gpu-0.1.0/buildz/gpu/az/reshape.py ADDED Viewed

@@ -0,0 +1,16 @@
+from buildz.base import Base
+from .util import *
+class Reshape(Base):
+    def init(self, shape):
+        self.shape = shape
+    def cache(self, shape, unit=1):
+        return 0
+    def cal(self, shape):
+        return 0
+    def backcal(self, shape):
+        return 0
+    def size(self, unit=1):
+        return 0
+    def out(self, shape):
+        return self.shape

buildz_gpu-0.1.0/buildz/gpu/az/ups.py ADDED Viewed

@@ -0,0 +1,40 @@
+from buildz.base import Base
+from .util import *
+class Upsample(Base):
+    def init(self, dims, size=None, scale_factor=None, mode='nearest'):
+        self.dims = dims
+        assert size is not None or scale_factor is not None
+        self.size= exp(size, dims)
+        self.scale_factor=exp(scale_factor, dims)
+        self.mode=mode
+    def fetch(self, shape):
+        if type(shape)==int:
+            shape = [shape]
+        shape = list(shape)
+        while len(shape)<self.dims+2:
+            shape = [1]+shape
+        batch = shape[0]
+        ch = shape[1]
+        return batch, ch, shape[2:]
+    def cache(self, shape, unit=1):
+        if self.mode=='nearest':
+            return 0
+        n = mul(shape)
+        return n*unit
+    def cal(self, shape):
+        oshape = self.out(shape)
+        batch, ch, oszs = self.fetch(oshape)
+        n = 4*batch*ch*mul(oszs)
+        return n
+    def backcal(self, shape):
+        return self.cal(batch)
+    def size(self, unit=1):
+        return 0
+    def out(self, shape):
+        batch, ch, szs = self.fetch(shape)
+        if self.size:
+            osz = list(self.size)
+        else:
+            osz = [int(sz/sf) for sz, sf in zip(self.size, self.scale_factor)]
+        return [batch, ch]+osz

buildz_gpu-0.1.0/buildz/gpu/az/util.py ADDED Viewed

@@ -0,0 +1,45 @@
+def mul(arr):
+    if type(arr) not in {list, tuple}:
+        return arr
+    i=1
+    for j in arr:
+        i*=j
+    return i
+def exp(val, dims):
+    if val is None:
+        return None
+    if type(val) not in {list, tuple}:
+        val = [val]*dims
+    return val
+def format_size(n, unit=1024):
+    units = ",K,M,G,T,P,E,Z,Y".split(",")
+    i=0
+    while n>=unit and i<len(units)-1:
+        n = n/unit
+        i+=1
+    n = "%.3f"%(n,)
+    return f"{n}{units[i]}"
+def nsize(fmt, unit=1024):
+    fmt=fmt.strip()
+    units = ",K,M,G,T,P,E,Z,Y".split(",")
+    maps = {}
+    val=1
+    for k in units:
+        maps[k] = val
+        val =val*unit
+    k= fmt[-1]
+    if k not in maps:
+        k = ""
+    val = float(fmt[:len(fmt)-len(k)])
+    return val*maps[k]
+pass
+ns=nsize
+fmt_sz = format_size
+fmt_size=format_size

buildz_gpu-0.1.0/buildz/gpu/demo/az.js ADDED Viewed

@@ -0,0 +1,22 @@
+cal:{
+    // 理论值
+    // RTX4060: 15T
+    // i7-13700H: 657G
+    // 测试值
+    RTX4060: 12T
+    gpu: 12T
+    i7-13700H: 500G
+    cpu: 500G
+}
+trans: {
+    gpu: 272G
+    // PCIE4x8 2G*8，理论值
+    // gpu_mem: 16G
+    // 实际值，需要加上pin_memory=True标志，如torch.randn(..., pin_memory=True)，让内存分配连续才能达到
+    //
+    gpu2mem: 10G
+    // 实际训练数据，由于在内存里是分散存放的，拷贝到显存的速度更慢
+    gpu2mem: 1.6G
+    wifi6: 1.2G
+}

buildz_gpu-0.1.0/buildz/gpu/demo/az.py ADDED Viewed

@@ -0,0 +1,72 @@
+from buildz.gpuz.az import *
+# from buildz.gpuz.az.nreshape import reshape
+from buildz import xf, fz
+# from torch import nn
+# import torch
+import os
+dp = os.path.dirname(__file__)
+fp = os.path.join(dp, "az.js")
+conf = xf.loadf(fp)
+speed = conf.get("cal", {})
+speed_gpu = nsize(speed.get("gpu", "10T"))
+speed_cpu = nsize(speed.get("cpu", "500G"))
+trans = conf.get("trans", {})
+trans_mem = nsize(trans.get("gpu2mem", "1G"))
+def unit(din):
+    # batch, seq_n, din
+    nets = []
+    nets.append(MultiAttrn(din, din, 8))
+    #nets.append(MultiAttrn(din, din, 8))
+    #nets.append(Linear(din,din))
+    #nets.append(Linear(din,din))
+    nets.append(Linear(din,din))
+    return nets
+def conv_unit(ksize, ch_in, ch_out, reshape):
+    # batch, seq_n, din
+    nets = []
+    nets.append(Reshape(reshape))
+    if type(ksize)!={list, tuple}:
+        ksize = [ksize, ksize]
+    nets.append(Conv(2, ch_in, ch_out, ksize, 1, 1, [k//2 for k in ksize]))
+    batch, ch, w, h = reshape
+    outshape = [batch, ch_out, w, h]
+    nets.append(Reshape(outshape))
+    #nets.append(MultiAttrn(din, din, 8))
+    nets.append(Linear(din,din))
+    return nets
+din =4096
+# din=256
+batch=1
+seq_n=1024
+az_nets = []
+for i in range(10):
+    az_nets+=conv_unit(3, 4, 4, [batch, 4, 32, 32])
+    az_nets+=unit(din)
+az_nets = Nets(*az_nets)
+data_shape = [batch, din]
+data_size = mul(data_shape)*4
+data_trans = data_size/trans_mem
+print(f"data: {fmt_sz(data_size)}, data_trans: {data_trans:.3f} sec")
+net_size = az_nets.size(4)
+net_trans = net_size/trans_mem
+print(f"net size: {fmt_sz(net_size)}, net_trans: {net_trans:.3f} sec")
+cache_size = az_nets.cache(data_shape,4)
+cache_trans = cache_size/trans_mem
+print(f"cache: {fmt_sz(cache_size)}, cache_trans: {cache_trans:.3f} sec")
+cal_amount = az_nets.cal(data_shape)
+cal_gpu = cal_amount/speed_gpu
+cal_cpu = cal_amount/speed_cpu
+print(f"cal: {fmt_sz(cal_amount)}, cal_gpu: {cal_gpu:.3f} sec, cal_cpu: {cal_cpu:.3f} sec")
+print("done")
+"""
+python -m buildz.gpuz.demo.az
+"""

buildz_gpu-0.1.0/buildz/gpu/demo/cal_cpu.py ADDED Viewed

@@ -0,0 +1,44 @@
+from buildz.gpuz.az import *
+from buildz import xf, fz
+from torch import nn
+import torch
+din =4096*2
+dout = 4096*2
+batch=10240
+az_nets = []
+nets = []
+for i in range(5):
+    az_nets.append(Linear(din,dout))
+    nets.append(nn.Linear(din,dout))
+az_nets = Nets(*az_nets)
+nets = nn.Sequential(*nets)
+data = torch.randn(batch, din)
+print(f"data: {fmt_sz(batch*din)}")
+print(f"net size: {fmt_sz(az_nets.size(4))}")
+print(f"cache: {fmt_sz(az_nets.cache(data.size(),4))}")
+print(f"cal: {fmt_sz(az_nets.cal(data.size()))}")
+print("done")
+import time
+# nets=nets.cuda()
+# data=data.cuda()
+loop=5
+out = data
+curr=time.time()
+with torch.no_grad():
+    for i in range(loop):
+        out = nets(out)
+sec = time.time()-curr
+print(f"time cost: {sec}/{loop}")
+assert sec>0
+speed = az_nets.cal(data.size())*loop/(sec+1e-10)
+print(f"speed: {fmt_sz(speed)}")
+"""
+python -m buildz.gpuz.demo.test_cal
+"""

buildz_gpu-0.1.0/buildz/gpu/demo/cal_gpu.py ADDED Viewed

@@ -0,0 +1,44 @@
+from buildz.gpuz.az import *
+from buildz import xf, fz
+from torch import nn
+import torch
+din =4096*2
+dout = 4096*2
+batch=10240
+az_nets = []
+nets = []
+for i in range(5):
+    az_nets.append(Linear(din,dout))
+    nets.append(nn.Linear(din,dout))
+az_nets = Nets(*az_nets)
+nets = nn.Sequential(*nets)
+data = torch.randn(batch, din)
+print(f"data: {fmt_sz(batch*din)}")
+print(f"net size: {fmt_sz(az_nets.size(4))}")
+print(f"cache: {fmt_sz(az_nets.cache(data.size(),4))}")
+print(f"cal: {fmt_sz(az_nets.cal(data.size()))}")
+print("done")
+import time
+nets=nets.cuda()
+data=data.cuda()
+loop=200
+out = data
+curr=time.time()
+with torch.no_grad():
+    for i in range(loop):
+        out = nets(out)
+sec = time.time()-curr
+print(f"time cost: {sec}/{loop}")
+assert sec>0
+speed = az_nets.cal(data.size())*loop/(sec+1e-10)
+print(f"speed: {fmt_sz(speed)}")
+"""
+python -m buildz.gpuz.demo.test_cal
+"""

buildz_gpu-0.1.0/buildz/gpu/demo/trans.py ADDED Viewed

@@ -0,0 +1,34 @@
+from buildz.gpuz.az import *
+from buildz import xf, fz
+from torch import nn
+import torch
+din =4096*4
+batch=10240
+loop=6
+# data = torch.randn(batch, din, pin_memory=True)requires_grad=False
+datas = [torch.randn(batch, din, pin_memory=True) for i in range(loop)]
+n = batch*din
+size = n*4
+print(f"data: {fmt_sz(batch*din*loop)}")
+import time
+curr=time.time()
+for i in range(loop):
+    data = datas[i]
+    data=data.cuda()
+    del data
+    # data=data.cpu()
+sec = time.time()-curr
+print(f"time cost: {sec}/{loop}")
+assert sec>0
+speed = size*loop/(sec+1e-10)
+print(f"speed: {fmt_sz(speed)}")
+"""
+python -m buildz.gpuz.demo.trans
+"""

buildz_gpu-0.1.0/buildz/gpu/demo/trans2.py ADDED Viewed

@@ -0,0 +1,32 @@
+from buildz.gpuz.az import *
+from buildz import xf, fz
+from torch import nn
+import torch
+din =4096*4
+batch=10240
+loop=10
+data = torch.randn(batch, din, pin_memory=True)
+n = batch*din
+size = n*4
+print(f"data: {fmt_sz(batch*din*loop)}")
+import time
+curr=time.time()
+for i in range(loop):
+    data = data
+    data=data.cuda()
+    data=data.cpu().contiguous()
+sec = time.time()-curr
+print(f"time cost: {sec}/{loop}")
+assert sec>0
+speed = size*loop/(sec+1e-10)
+print(f"speed: {fmt_sz(speed)}")
+"""
+python -m buildz.gpuz.demo.trans2
+"""