cry-bizisi-parser 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.txt +148 -0
- package/cli.ts +58 -0
- package/dist/bizisi.d.ts +29 -0
- package/dist/bizisi.d.ts.map +1 -0
- package/dist/bizisi.js +623 -0
- package/dist/bizisi.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +3 -0
- package/dist/index.js.map +1 -0
- package/dist/types.d.ts +66 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +35 -0
- package/src/bizisi.ts +720 -0
- package/src/index.ts +2 -0
- package/src/types.ts +71 -0
package/README.txt
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
==============================================================================
|
|
2
|
+
bizi.si parser — Bun knjižnica za zajem podatkov s portala bizi.si
|
|
3
|
+
==============================================================================
|
|
4
|
+
|
|
5
|
+
Avtor: bizisi-parser
|
|
6
|
+
Zahteve: Bun 1.3+
|
|
7
|
+
Odvisnosti: tesseract.js (za OCR imen oseb)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
1. NAMESTITEV
|
|
11
|
+
==============================================================================
|
|
12
|
+
|
|
13
|
+
bun install
|
|
14
|
+
|
|
15
|
+
Prvi zagon bo prenesel še ~15MB jezikovnih podatkov za OCR (slovenščina).
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
2. UPORABA IZ UKAZNE VRSTICE
|
|
19
|
+
==============================================================================
|
|
20
|
+
|
|
21
|
+
# Iskanje podjetja (brez prijave — javni podatki)
|
|
22
|
+
bun run cli.ts "animalia"
|
|
23
|
+
|
|
24
|
+
# Omejitev števila zadetkov
|
|
25
|
+
bun run cli.ts "animalia" --max 2
|
|
26
|
+
|
|
27
|
+
# S prijavo (za finančne podatke)
|
|
28
|
+
BIZISI_USERNAME="uporabnik" BIZISI_PASSWORD="geslo" bun run cli.ts "animalia" --login
|
|
29
|
+
|
|
30
|
+
Primeri:
|
|
31
|
+
|
|
32
|
+
bun run cli.ts "6812465000" # iskanje po matični številki
|
|
33
|
+
bun run cli.ts "59325151" # iskanje po davčni številki
|
|
34
|
+
bun run cli.ts "ETVETERA" --login # s prijavo
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
3. UPORABA IZ KODE
|
|
38
|
+
==============================================================================
|
|
39
|
+
|
|
40
|
+
import { BizisiScraper } from "./src/index";
|
|
41
|
+
|
|
42
|
+
const scraper = new BizisiScraper();
|
|
43
|
+
|
|
44
|
+
// Iskanje + zajem (brez prijave)
|
|
45
|
+
const rezultati = await scraper.scrape("animalia");
|
|
46
|
+
console.log(JSON.stringify(rezultati, null, 2));
|
|
47
|
+
|
|
48
|
+
// S prijavo (za finančne podatke)
|
|
49
|
+
const scraper2 = new BizisiScraper();
|
|
50
|
+
await scraper2.login("uporabnik", "geslo");
|
|
51
|
+
// ali: await scraper2.login(); // bere BIZISI_USERNAME/PASSWORD iz okolja
|
|
52
|
+
const data = await scraper2.scrape("etvetera", { includeFinancialData: true });
|
|
53
|
+
console.log(JSON.stringify(data, null, 2));
|
|
54
|
+
|
|
55
|
+
// Samo iskanje (brez detajlov)
|
|
56
|
+
const zadetki = await scraper.search("6812465000");
|
|
57
|
+
|
|
58
|
+
// Samo detajli podjetja
|
|
59
|
+
const detajli = await scraper.getCompanyDetail("/ETVETERA-D-O-O/");
|
|
60
|
+
|
|
61
|
+
// Samo finančni podatki
|
|
62
|
+
const finance = await scraper.getFinancialData("/ETVETERA-D-O-O/");
|
|
63
|
+
|
|
64
|
+
// Počisti OCR worker (obvezno na koncu!)
|
|
65
|
+
await scraper.close();
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
4. PODATKI, KI JIH VRNE
|
|
69
|
+
==============================================================================
|
|
70
|
+
|
|
71
|
+
CompanyDetail:
|
|
72
|
+
┌─────────────────────┬─────────────────────────────────────────────────┐
|
|
73
|
+
│ Polje │ Opis │
|
|
74
|
+
├─────────────────────┼─────────────────────────────────────────────────┤
|
|
75
|
+
│ name │ Naziv podjetja (iz h1) │
|
|
76
|
+
│ fullAddress │ Naslov (ulica, kraj, pošta) │
|
|
77
|
+
│ phone │ Telefon (iz tel: povezave) │
|
|
78
|
+
│ website │ Spletna stran │
|
|
79
|
+
│ email │ E-pošta │
|
|
80
|
+
│ taxNumber │ Davčna številka │
|
|
81
|
+
│ registrationNumber │ Matična številka │
|
|
82
|
+
│ vatPayer │ Ali je zavezanec za DDV │
|
|
83
|
+
│ vatNumber │ DDV številka (SI...) │
|
|
84
|
+
│ skis │ SKIS šifra │
|
|
85
|
+
│ dateOfEntry │ Datum vpisa │
|
|
86
|
+
│ activity │ Dejavnost (TSmedia) │
|
|
87
|
+
│ keyPeople │ Nadzorniki, ustanovitelji, zastopniki (OCR) │
|
|
88
|
+
│ bankAccounts │ Seznam TRR-jev (IBAN, status, banka) │
|
|
89
|
+
│ financingFromBudget │ Zneski financiranja iz proračuna po letih │
|
|
90
|
+
└─────────────────────┴─────────────────────────────────────────────────┘
|
|
91
|
+
|
|
92
|
+
FinancialData:
|
|
93
|
+
┌─────────────────────┬─────────────────────────────────────────────────┐
|
|
94
|
+
│ year │ Leto │
|
|
95
|
+
│ netSalesRevenue │ Čisti prihodki od prodaje (ali null) │
|
|
96
|
+
│ operatingResult │ Poslovni izzid (ali null) │
|
|
97
|
+
└─────────────────────┴─────────────────────────────────────────────────┘
|
|
98
|
+
|
|
99
|
+
SearchResult:
|
|
100
|
+
┌─────────────────────┬─────────────────────────────────────────────────┐
|
|
101
|
+
│ name │ Naziv podjetja │
|
|
102
|
+
│ url │ Relativni URL (npr. /ETVETERA-D-O-O/) │
|
|
103
|
+
│ address │ Naslov │
|
|
104
|
+
│ city │ Pošta + kraj │
|
|
105
|
+
│ registrationNumber │ Matična številka │
|
|
106
|
+
│ taxNumber │ Davčna številka │
|
|
107
|
+
│ activity │ Dejavnost │
|
|
108
|
+
│ employees │ Št. zaposlenih (povprečje, npr. 3.5) │
|
|
109
|
+
└─────────────────────┴─────────────────────────────────────────────────┘
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
5. POMEMBNO
|
|
113
|
+
==============================================================================
|
|
114
|
+
|
|
115
|
+
• Prijava je potrebna samo za finančne podatke. Vsi ostali podatki
|
|
116
|
+
(kontakt, TRR, ključne osebe, SKIS) so javno dostopni.
|
|
117
|
+
|
|
118
|
+
• bizi.si ima rate-limiter za prijave. Ob preveč poskusih vrne
|
|
119
|
+
"Presegli ste dovoljeno število prijav." Počakajte nekaj časa.
|
|
120
|
+
|
|
121
|
+
• Imena ključnih oseb so na bizi.si zaščitena kot slike
|
|
122
|
+
(ImageGenerator.aspx). Knjižnica uporablja tesseract.js OCR za
|
|
123
|
+
pretvorbo slik v besedilo. Statusna sporočila ("Nadzornik ni vpisan.")
|
|
124
|
+
so filtrirana in vrnejo prazen seznam.
|
|
125
|
+
|
|
126
|
+
• Število zaposlenih: "3 ali 4 zaposleni" → 3.5 (povprečje).
|
|
127
|
+
|
|
128
|
+
• Knjižnica nima zunanjih odvisnosti razen tesseract.js za OCR.
|
|
129
|
+
Uporablja izključno Bun-ov vgrajeni fetch (HTTP).
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
6. STRUKTURA PROJEKTA
|
|
133
|
+
==============================================================================
|
|
134
|
+
|
|
135
|
+
src/
|
|
136
|
+
index.ts — Vstopna točka, exporti
|
|
137
|
+
bizisi.ts — Glavni BizisiScraper razred
|
|
138
|
+
types.ts — TypeScript tipi
|
|
139
|
+
|
|
140
|
+
cli.ts — Orodje za ukazno vrstico
|
|
141
|
+
package.json — Konfiguracija projekta
|
|
142
|
+
README.txt — Ta dokument
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
7. LICENCA
|
|
146
|
+
==============================================================================
|
|
147
|
+
|
|
148
|
+
MIT
|
package/cli.ts
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* CLI tool za bizi.si — čisti Bun fetch, brez brskalnika.
|
|
4
|
+
*
|
|
5
|
+
* Usage:
|
|
6
|
+
* bun run cli.ts "query" # default: brez prijave
|
|
7
|
+
* bun run cli.ts "query" --login # z prijavo (za finančne podatke)
|
|
8
|
+
* bun run cli.ts "query" --max 2 # omeji zadetke
|
|
9
|
+
*
|
|
10
|
+
* Login zahteva BIZISI_USERNAME in BIZISI_PASSWORD v okolju.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { BizisiScraper } from "./src/index";
|
|
14
|
+
|
|
15
|
+
const query = process.argv[2];
|
|
16
|
+
if (!query) {
|
|
17
|
+
console.error("Usage: bun run cli.ts <query> [--login] [--max <N>]");
|
|
18
|
+
process.exit(1);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const doLogin = process.argv.includes("--login");
|
|
22
|
+
const maxIdx = process.argv.indexOf("--max");
|
|
23
|
+
const maxResults = maxIdx > 0 ? parseInt(process.argv[maxIdx + 1] ?? "0", 10) : 0;
|
|
24
|
+
|
|
25
|
+
const scraper = new BizisiScraper();
|
|
26
|
+
|
|
27
|
+
try {
|
|
28
|
+
if (doLogin) {
|
|
29
|
+
console.error("🔑 Logging in to bizi.si …");
|
|
30
|
+
await scraper.login();
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
console.error(`🔍 Searching & scraping "${query}" …`);
|
|
34
|
+
|
|
35
|
+
const scraped = await scraper.scrape(query, {
|
|
36
|
+
maxResults: maxResults > 0 ? maxResults : 0,
|
|
37
|
+
includeFinancialData: doLogin,
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
console.error(`📋 Got ${scraped.length} result(s)`);
|
|
41
|
+
for (const s of scraped) {
|
|
42
|
+
console.error(` - ${s.company.name} (${s.company.registrationNumber})`);
|
|
43
|
+
if (s.company.phone) console.error(` 📞 ${s.company.phone}`);
|
|
44
|
+
if (s.company.email) console.error(` 📧 ${s.company.email}`);
|
|
45
|
+
if (s.company.website) console.error(` 🌐 ${s.company.website}`);
|
|
46
|
+
if (s.company.bankAccounts.length > 0)
|
|
47
|
+
console.error(` 🏦 ${s.company.bankAccounts.length} TRR(s)`);
|
|
48
|
+
if (s.financialData.length > 0)
|
|
49
|
+
console.error(` 📊 ${s.financialData.length} years of financial data`);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
console.log(JSON.stringify(scraped, null, 2));
|
|
53
|
+
} catch (err) {
|
|
54
|
+
console.error("❌ Error:", err);
|
|
55
|
+
process.exit(1);
|
|
56
|
+
} finally {
|
|
57
|
+
await scraper.close();
|
|
58
|
+
}
|
package/dist/bizisi.d.ts
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* bizi.si scraper — čisti Bun fetch, brez brskalnika.
|
|
3
|
+
*
|
|
4
|
+
* bizi.si uporablja ASP.NET WebForms, zato je treba:
|
|
5
|
+
* 1. Shranjevati piškotke (ASP.NET_SessionId, .ASPXAUTH)
|
|
6
|
+
* 2. Prenašati __VIEWSTATE in __EVENTVALIDATION med GET in POST
|
|
7
|
+
*
|
|
8
|
+
* Odvisnosti: 0 — samo Bun built-in API-ji.
|
|
9
|
+
*/
|
|
10
|
+
import type { CompanySearchResult, CompanyDetail, FinancialData, ScrapeResult } from "./types.js";
|
|
11
|
+
export declare class BizisiScraper {
|
|
12
|
+
#private;
|
|
13
|
+
private jar;
|
|
14
|
+
private _loggedIn;
|
|
15
|
+
private ocrWorker;
|
|
16
|
+
/** Počisti OCR worker (pokliči ko končaš). */
|
|
17
|
+
close(): Promise<void>;
|
|
18
|
+
get loggedIn(): boolean;
|
|
19
|
+
login(username?: string, password?: string): Promise<void>;
|
|
20
|
+
search(query: string): Promise<CompanySearchResult[]>;
|
|
21
|
+
getCompanyDetail(relativeUrl: string): Promise<CompanyDetail>;
|
|
22
|
+
getFinancialData(relativeUrl: string): Promise<FinancialData[]>;
|
|
23
|
+
scrapeCompany(relativeUrl: string): Promise<ScrapeResult>;
|
|
24
|
+
scrape(query: string, options?: {
|
|
25
|
+
maxResults?: number;
|
|
26
|
+
includeFinancialData?: boolean;
|
|
27
|
+
}): Promise<ScrapeResult[]>;
|
|
28
|
+
}
|
|
29
|
+
//# sourceMappingURL=bizisi.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bizisi.d.ts","sourceRoot":"","sources":["../src/bizisi.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAIH,OAAO,KAAK,EACV,mBAAmB,EACnB,aAAa,EACb,aAAa,EAGb,YAAY,EACb,MAAM,YAAY,CAAC;AAgOpB,qBAAa,aAAa;;IACxB,OAAO,CAAC,GAAG,CAAmB;IAC9B,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,SAAS,CAAuB;IAYxC,8CAA8C;IACxC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAoB5B,IAAI,QAAQ,IAAI,OAAO,CAEtB;IAMK,KAAK,CAAC,QAAQ,CAAC,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAoD1D,MAAM,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,mBAAmB,EAAE,CAAC;IA4DrD,gBAAgB,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,CAAC;IAyI7D,gBAAgB,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO,CAAC,aAAa,EAAE,CAAC;IA6C/D,aAAa,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAMzD,MAAM,CACV,KAAK,EAAE,MAAM,EACb,OAAO,CAAC,EAAE;QAAE,UAAU,CAAC,EAAE,MAAM,CAAC;QAAC,oBAAoB,CAAC,EAAE,OAAO,CAAA;KAAE,GAChE,OAAO,CAAC,YAAY,EAAE,CAAC;CAiI3B"}
|