sklip 0.4.0__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sklip-0.4.0 → sklip-0.5.0}/PKG-INFO +1 -1
- {sklip-0.4.0 → sklip-0.5.0}/pyproject.toml +1 -1
- {sklip-0.4.0 → sklip-0.5.0}/sklip/__init__.py +1 -1
- sklip-0.5.0/sklip/core.py +139 -0
- {sklip-0.4.0 → sklip-0.5.0}/sklip.egg-info/PKG-INFO +1 -1
- sklip-0.4.0/sklip/core.py +0 -42
- {sklip-0.4.0 → sklip-0.5.0}/setup.cfg +0 -0
- {sklip-0.4.0 → sklip-0.5.0}/sklip.egg-info/SOURCES.txt +0 -0
- {sklip-0.4.0 → sklip-0.5.0}/sklip.egg-info/dependency_links.txt +0 -0
- {sklip-0.4.0 → sklip-0.5.0}/sklip.egg-info/requires.txt +0 -0
- {sklip-0.4.0 → sklip-0.5.0}/sklip.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import pyperclip
|
|
2
|
+
|
|
3
|
+
SNIPPETS = {
|
|
4
|
+
1: "import 4333432numpy as np \nimport pandas as pd \nimport folium \nimport seaborn as sns \nimport matplotlib.pyplot as plt",
|
|
5
|
+
2: """# Статистический анализ числовых колонок
|
|
6
|
+
numeric_columns = df.select_dtypes(include='number').columns
|
|
7
|
+
for col in numeric_columns:
|
|
8
|
+
print(f"{col}:")
|
|
9
|
+
print(f" Медиана: {df[col].median():.2f}")
|
|
10
|
+
print(f" Среднее: {df[col].mean():.2f}")
|
|
11
|
+
print(f" Стандартное отклонение: {df[col].std():.2f}")
|
|
12
|
+
print(f" Асимметрия: {df[col].skew():.2f}")
|
|
13
|
+
print(f" Эксцесс: {df[col].kurtosis():.2f}")
|
|
14
|
+
print(f" Минимум: {df[col].min():.2f}")
|
|
15
|
+
print(f" Максимум: {df[col].max():.2f}")
|
|
16
|
+
print()""",
|
|
17
|
+
3: """from statsmodels.graphics.gofplots import qqplot
|
|
18
|
+
from matplotlib import pyplot
|
|
19
|
+
qqplot(df.step_frequency, line='s')
|
|
20
|
+
pyplot.show""",
|
|
21
|
+
4: """from scipy.stats import shapiro
|
|
22
|
+
from scipy.stats import normaltest
|
|
23
|
+
shapiro(df.step_frequency)""",
|
|
24
|
+
5: """df['datetime'] = pd.to_datetime(df['datetime'])
|
|
25
|
+
df['hour'] =df['datetime'].dt.hour
|
|
26
|
+
df['time_of_day'] = pd.cut(df['hour'],bins = [0,6,12,18,24],labels=['Ночь','Утро','День','Вечер'],right=False)
|
|
27
|
+
""",
|
|
28
|
+
6: """plt.figure(figsize=(15,10))
|
|
29
|
+
sns.heatmap(corr_matrix,annot=True)
|
|
30
|
+
corr_matrix = dfcor.corr()""",
|
|
31
|
+
7: """lan = df['latitude'].mean()
|
|
32
|
+
lon = df['longitude'].mean()
|
|
33
|
+
|
|
34
|
+
m = folium.Map(location=[lan,lon],zoom_start=18)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
for idx,row in df.iterrows():
|
|
39
|
+
folium.CircleMarker(row[['latitude','longitude']].values,
|
|
40
|
+
color='red',radius = 3, fill = True ).add_to(m)
|
|
41
|
+
m""",
|
|
42
|
+
8: """ if 'datetime' not in df.columns or df['datetime'].isnull().all():
|
|
43
|
+
df['datetime'] = pd.date_range('2026-01-01', periods=len(df), freq='1min')
|
|
44
|
+
else:
|
|
45
|
+
df['datetime'] = pd.to_datetime(df['datetime'], errors='coerce').fillna(
|
|
46
|
+
pd.Timestamp('2026-01-01')
|
|
47
|
+
)
|
|
48
|
+
if 'latitude' not in df: df['latitude'] = 51.26 + np.random.normal(0, 0.01, len(df))
|
|
49
|
+
if 'longitude' not in df: df['longitude'] = 10.86 + np.random.normal(0, 0.01, len(df))
|
|
50
|
+
if 'speed' not in df: df['speed'] = np.random.normal(4, 1, len(df)).clip(1, 8)
|
|
51
|
+
|
|
52
|
+
numeric = ['latitude', 'longitude', 'speed', 'elevation']
|
|
53
|
+
avail_num = [col for col in numeric if col in df.columns]
|
|
54
|
+
scaler = StandardScaler()
|
|
55
|
+
knn = KNNImputer(n_neighbors=5, weights='distance')
|
|
56
|
+
|
|
57
|
+
df_num = df[avail_num].fillna(0)
|
|
58
|
+
df_scaled = scaler.fit_transform(df_num)
|
|
59
|
+
df_filled = knn.fit_transform(df_scaled)
|
|
60
|
+
df[avail_num] = scaler.inverse_transform(df_filled)
|
|
61
|
+
df['step_frequency'] = np.clip(df['speed'] / 3.6 / 0.75 * 60, 60, 140)
|
|
62
|
+
bins = [0, 500, 1500, 3000, float('inf')]
|
|
63
|
+
labels = ['лес', 'холмы', 'горы', 'альпы']
|
|
64
|
+
df['terrain_type'] = pd.cut(df['elevation'], bins=bins, labels=labels)
|
|
65
|
+
""",
|
|
66
|
+
|
|
67
|
+
9: """from sklearn import metrics
|
|
68
|
+
from sklearn.cluster import AgglomerativeClustering
|
|
69
|
+
from sklearn.cluster import KMeans
|
|
70
|
+
from sklearn.cluster import SpectralClustering
|
|
71
|
+
import scipy.cluster.hierarchy as shc
|
|
72
|
+
from scipy.cluster.hierarchy import dendrogram, linkage
|
|
73
|
+
from sklearn.metrics import silhouette_score
|
|
74
|
+
from sklearn.metrics import calinski_harabasz_score
|
|
75
|
+
from sklearn.decomposition import PCA""",
|
|
76
|
+
10: """
|
|
77
|
+
X = df[['speed', 'elevation', 'step_frequency','temp']]
|
|
78
|
+
from sklearn.preprocessing import StandardScaler
|
|
79
|
+
scaler = StandardScaler()
|
|
80
|
+
X_scaled=scaler.fit_transform(X)""",
|
|
81
|
+
11: """import plotly.express as px
|
|
82
|
+
plt = px.scatter_3d(X, x='speed', y='elevation', z='step_frequency',
|
|
83
|
+
title='K-Means')""",
|
|
84
|
+
12: """from sklearn.cluster import KMeans
|
|
85
|
+
wcss=[]
|
|
86
|
+
for k in range(1,11):
|
|
87
|
+
Kmeans = KMeans(n_clusters=k,random_state=42)
|
|
88
|
+
Kmeans.fit(X_scaled)
|
|
89
|
+
wcss.append(Kmeans.inertia_)
|
|
90
|
+
""",
|
|
91
|
+
13: """sns.lineplot(x=range(1,11), y=wcss, marker='o')""",
|
|
92
|
+
14: """cluster = KMeans(n_clusters=3)
|
|
93
|
+
kmeans = cluster.fit_predict(X_scaled)""",
|
|
94
|
+
15: """
|
|
95
|
+
plt.figure(figsize=(20, 15), dpi=100)
|
|
96
|
+
scatter=plt.scatter(X_scaled[:,0],X_scaled[:,1],c=kmeans)
|
|
97
|
+
plt.legend(*scatter.legend_elements(), title="Кластеры")""",
|
|
98
|
+
16: """from sklearn.cluster import DBSCAN
|
|
99
|
+
DbscanCluster = DBSCAN(eps=0.5, min_samples=20)
|
|
100
|
+
dbscan = DbscanCluster.fit_predict(X_scaled)""",
|
|
101
|
+
17: """from sklearn.mixture import GaussianMixture
|
|
102
|
+
gmm = GaussianMixture(n_components=2, random_state=0)
|
|
103
|
+
gmm_pred = gmm.fit_predict(X_scaled)""",
|
|
104
|
+
18: """harabKmeans = metrics.calinski_harabasz_score(X_scaled, kmeans)
|
|
105
|
+
metrics.calinski_harabasz_score(X_scaled, kmeans)""",
|
|
106
|
+
19: """silhKmeans = metrics.silhouette_score(X_scaled, kmeans)
|
|
107
|
+
metrics.silhouette_score(X_scaled, kmeans)""",
|
|
108
|
+
20: """metrics = ['Kmeans', 'dbscan','gmm_pred']
|
|
109
|
+
scores = [harabKmeans, harabDBscan,harabgmm_pred]
|
|
110
|
+
plt.bar(metrics, scores, color=['blue', 'green'])
|
|
111
|
+
for i, v in enumerate(scores):
|
|
112
|
+
plt.text(i, v, f'{v:.2f}', ha='center', va='bottom')""",
|
|
113
|
+
21: """GEOPOINT(FLOAT([Широта]), FLOAT([Долгота]))
|
|
114
|
+
MARKUP(
|
|
115
|
+
"ID точки: ", [ID точки], BR(),
|
|
116
|
+
"Трек: ", [ID трека], BR(),
|
|
117
|
+
"Время: ", [Дататайм], BR(),
|
|
118
|
+
"Скорость: ", [Скорость], " км/ч", BR(),
|
|
119
|
+
"Температура: ", [Температура ], "°C"
|
|
120
|
+
)""",
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
def s(id: int = 1) -> bool:
|
|
124
|
+
|
|
125
|
+
text = SNIPPETS.get(id, SNIPPETS[1])
|
|
126
|
+
pyperclip.copy(text)
|
|
127
|
+
def ClusterKMeans(id: int = 1) -> bool:
|
|
128
|
+
|
|
129
|
+
text = SNIPPETS.get(id, SNIPPETS[1])
|
|
130
|
+
pyperclip.copy(text)
|
|
131
|
+
|
|
132
|
+
def figsize(id: int = 1, id2: int = 1) -> bool:
|
|
133
|
+
text = SNIPPETS.get(id, SNIPPETS[1])
|
|
134
|
+
pyperclip.copy(text)
|
|
135
|
+
|
|
136
|
+
def x(id: int = 1) -> bool:
|
|
137
|
+
|
|
138
|
+
text = SNIPPETS.get(id, SNIPPETS[1])
|
|
139
|
+
pyperclip.copy(text)
|
sklip-0.4.0/sklip/core.py
DELETED
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
import pyperclip
|
|
2
|
-
|
|
3
|
-
SNIPPETS = {
|
|
4
|
-
1: "import 4333432numpy as np \n import pandas as pd \n import folium \n import seaborn as sns \n import matplotlib.pyplot as plt",
|
|
5
|
-
2: "import pandas as pd",
|
|
6
|
-
3: "import matplotlib.pyplot as plt",
|
|
7
|
-
4: "from sklearn.cluster import KMeans",
|
|
8
|
-
5: "from sklearn.decomposition import PCA",
|
|
9
|
-
6: "from sklearn.metrics import silhouette_score",
|
|
10
|
-
7: "from sklearn.cluster import AgglomerativeClustering",
|
|
11
|
-
8: "from sklearn.cluster import SpectralClustering",
|
|
12
|
-
9: "from scipy.cluster.hierarchy import dendrogram, linkage",
|
|
13
|
-
10: "import seaborn as sns",
|
|
14
|
-
11: "warnings.filterwarnings('ignore')",
|
|
15
|
-
12: "X = np.random.randn(100, 2)",
|
|
16
|
-
13: "kmeans = KMeans(n_clusters=3, random_state=42)",
|
|
17
|
-
14: "labels = kmeans.fit_predict(X)",
|
|
18
|
-
15: "pca = PCA(n_components=2)",
|
|
19
|
-
16: "plt.figure(figsize=(10, 7))",
|
|
20
|
-
17: "df = pd.read_csv('data.csv')",
|
|
21
|
-
18: "from sklearn.metrics import calinski_harabasz_score",
|
|
22
|
-
19: "import scipy.cluster.hierarchy as shc",
|
|
23
|
-
20: "np.random.seed(42)"
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
def s(id: int = 1) -> bool:
|
|
27
|
-
|
|
28
|
-
text = SNIPPETS.get(id, SNIPPETS[1])
|
|
29
|
-
pyperclip.copy(text)
|
|
30
|
-
def ClusterKMeans(id: int = 1) -> bool:
|
|
31
|
-
|
|
32
|
-
text = SNIPPETS.get(id, SNIPPETS[1])
|
|
33
|
-
pyperclip.copy(text)
|
|
34
|
-
|
|
35
|
-
def figsize(id: int = 1, id2: int = 1) -> bool:
|
|
36
|
-
text = SNIPPETS.get(id, SNIPPETS[1])
|
|
37
|
-
pyperclip.copy(text)
|
|
38
|
-
|
|
39
|
-
def x(id: int = 1) -> bool:
|
|
40
|
-
|
|
41
|
-
text = SNIPPETS.get(id, SNIPPETS[1])
|
|
42
|
-
pyperclip.copy(text)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|