scraper2-hj3415 2.4.1__py3-none-any.whl → 2.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. scraper2_hj3415/app/adapters/out/playwright/browser.py +26 -0
  2. {scraper2 → scraper2_hj3415/app}/adapters/out/playwright/browser_factory.py +7 -7
  3. scraper2_hj3415/app/adapters/out/playwright/capabilities/__init__.py +18 -0
  4. scraper2_hj3415/app/adapters/out/playwright/capabilities/_base.py +19 -0
  5. scraper2_hj3415/app/adapters/out/playwright/capabilities/interaction.py +37 -0
  6. scraper2_hj3415/app/adapters/out/playwright/capabilities/navigation.py +24 -0
  7. scraper2_hj3415/app/adapters/out/playwright/capabilities/scope.py +84 -0
  8. scraper2_hj3415/app/adapters/out/playwright/capabilities/table.py +90 -0
  9. scraper2_hj3415/app/adapters/out/playwright/capabilities/text.py +25 -0
  10. scraper2_hj3415/app/adapters/out/playwright/capabilities/wait.py +96 -0
  11. {scraper2 → scraper2_hj3415/app}/adapters/out/playwright/session.py +1 -1
  12. scraper2_hj3415/app/adapters/out/sinks/memory_sink.py +25 -0
  13. scraper2_hj3415/app/adapters/out/sinks/mongo_sink.py +63 -0
  14. {scraper2/adapters/out/sinks/memory → scraper2_hj3415/app/adapters/out/sinks}/store.py +14 -5
  15. scraper2_hj3415/app/adapters/site/wisereport_playwright.py +379 -0
  16. scraper2_hj3415/app/composition.py +225 -0
  17. scraper2_hj3415/app/domain/blocks.py +61 -0
  18. scraper2_hj3415/app/domain/constants.py +33 -0
  19. scraper2_hj3415/app/domain/doc.py +16 -0
  20. scraper2_hj3415/app/domain/endpoint.py +11 -0
  21. scraper2_hj3415/app/domain/series.py +11 -0
  22. scraper2_hj3415/app/domain/types.py +19 -0
  23. scraper2_hj3415/app/parsing/_normalize/label.py +92 -0
  24. scraper2_hj3415/app/parsing/_normalize/table.py +53 -0
  25. scraper2_hj3415/app/parsing/_normalize/text.py +31 -0
  26. scraper2_hj3415/app/parsing/_normalize/values.py +70 -0
  27. scraper2_hj3415/app/parsing/_tables/html_table.py +89 -0
  28. scraper2_hj3415/app/parsing/c101/__init__.py +0 -0
  29. scraper2_hj3415/app/parsing/c101/_sise_normalizer.py +103 -0
  30. scraper2_hj3415/app/parsing/c101/company_overview.py +47 -0
  31. scraper2_hj3415/app/parsing/c101/earning_surprise.py +217 -0
  32. scraper2_hj3415/app/parsing/c101/fundamentals.py +95 -0
  33. scraper2_hj3415/app/parsing/c101/major_shareholders.py +57 -0
  34. scraper2_hj3415/app/parsing/c101/sise.py +47 -0
  35. scraper2_hj3415/app/parsing/c101/summary_cmp.py +87 -0
  36. scraper2_hj3415/app/parsing/c101/yearly_consensus.py +197 -0
  37. scraper2_hj3415/app/parsing/c101_parser.py +45 -0
  38. scraper2_hj3415/app/parsing/c103_parser.py +22 -0
  39. scraper2_hj3415/app/parsing/c104_parser.py +26 -0
  40. scraper2_hj3415/app/parsing/c106_parser.py +137 -0
  41. scraper2_hj3415/app/parsing/c108_parser.py +254 -0
  42. scraper2_hj3415/app/ports/__init__.py +0 -0
  43. scraper2_hj3415/app/ports/browser/__init__.py +0 -0
  44. scraper2_hj3415/app/ports/browser/browser_factory_port.py +9 -0
  45. scraper2_hj3415/app/ports/browser/browser_port.py +32 -0
  46. scraper2_hj3415/app/ports/browser/capabilities/__init__.py +15 -0
  47. scraper2_hj3415/app/ports/browser/capabilities/interaction.py +27 -0
  48. scraper2_hj3415/app/ports/browser/capabilities/navigation.py +18 -0
  49. scraper2_hj3415/app/ports/browser/capabilities/scope.py +66 -0
  50. scraper2_hj3415/app/ports/browser/capabilities/table.py +28 -0
  51. scraper2_hj3415/app/ports/browser/capabilities/text.py +16 -0
  52. scraper2_hj3415/app/ports/browser/capabilities/wait.py +51 -0
  53. scraper2_hj3415/app/ports/ingest/__init__.py +0 -0
  54. scraper2_hj3415/app/ports/ingest/nfs_ingest_port.py +28 -0
  55. scraper2_hj3415/app/ports/sinks/__init__.py +0 -0
  56. scraper2_hj3415/app/ports/sinks/nfs_sink_port.py +20 -0
  57. scraper2_hj3415/app/ports/site/__init__.py +0 -0
  58. scraper2_hj3415/app/ports/site/wisereport_port.py +30 -0
  59. scraper2_hj3415/app/services/__init__.py +0 -0
  60. scraper2_hj3415/app/services/fetch/__init__.py +0 -0
  61. scraper2_hj3415/app/services/fetch/fetch_c101.py +59 -0
  62. scraper2_hj3415/app/services/fetch/fetch_c103.py +121 -0
  63. scraper2_hj3415/app/services/fetch/fetch_c104.py +160 -0
  64. scraper2_hj3415/app/services/fetch/fetch_c106.py +90 -0
  65. scraper2_hj3415/app/services/fetch/fetch_c108.py +59 -0
  66. scraper2_hj3415/app/services/nfs_doc_builders.py +304 -0
  67. scraper2_hj3415/app/usecases/__init__.py +0 -0
  68. scraper2_hj3415/app/usecases/ingest/__init__.py +0 -0
  69. scraper2_hj3415/app/usecases/ingest/ingest_c101.py +111 -0
  70. scraper2_hj3415/app/usecases/ingest/ingest_c103.py +162 -0
  71. scraper2_hj3415/app/usecases/ingest/ingest_c104.py +182 -0
  72. scraper2_hj3415/app/usecases/ingest/ingest_c106.py +136 -0
  73. scraper2_hj3415/app/usecases/ingest/ingest_c108.py +122 -0
  74. scraper2/main.py → scraper2_hj3415/cli.py +45 -72
  75. {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.7.0.dist-info}/METADATA +3 -1
  76. scraper2_hj3415-2.7.0.dist-info/RECORD +93 -0
  77. scraper2_hj3415-2.7.0.dist-info/entry_points.txt +3 -0
  78. scraper2/adapters/out/playwright/browser.py +0 -102
  79. scraper2/adapters/out/sinks/memory/__init__.py +0 -15
  80. scraper2/adapters/out/sinks/memory/c101_memory_sink.py +0 -26
  81. scraper2/adapters/out/sinks/memory/c103_memory_sink.py +0 -26
  82. scraper2/adapters/out/sinks/memory/c104_memory_sink.py +0 -26
  83. scraper2/adapters/out/sinks/memory/c106_memory_sink.py +0 -26
  84. scraper2/adapters/out/sinks/memory/c108_memory_sink.py +0 -26
  85. scraper2/adapters/out/sinks/mongo/__init__.py +0 -14
  86. scraper2/adapters/out/sinks/mongo/c101_mongo_sink.py +0 -43
  87. scraper2/adapters/out/sinks/mongo/c103_mongo_sink.py +0 -41
  88. scraper2/adapters/out/sinks/mongo/c104_mongo_sink.py +0 -41
  89. scraper2/adapters/out/sinks/mongo/c106_mongo_sink.py +0 -41
  90. scraper2/adapters/out/sinks/mongo/c108_mongo_sink.py +0 -41
  91. scraper2/app/composition.py +0 -204
  92. scraper2/app/parsing/_converters.py +0 -85
  93. scraper2/app/parsing/_normalize.py +0 -134
  94. scraper2/app/parsing/c101_parser.py +0 -143
  95. scraper2/app/parsing/c103_parser.py +0 -128
  96. scraper2/app/parsing/c104_parser.py +0 -143
  97. scraper2/app/parsing/c106_parser.py +0 -153
  98. scraper2/app/parsing/c108_parser.py +0 -65
  99. scraper2/app/ports/browser/browser_factory_port.py +0 -11
  100. scraper2/app/ports/browser/browser_port.py +0 -22
  101. scraper2/app/ports/ingest_port.py +0 -14
  102. scraper2/app/ports/sinks/base_sink_port.py +0 -14
  103. scraper2/app/ports/sinks/c101_sink_port.py +0 -9
  104. scraper2/app/ports/sinks/c103_sink_port.py +0 -9
  105. scraper2/app/ports/sinks/c104_sink_port.py +0 -9
  106. scraper2/app/ports/sinks/c106_sink_port.py +0 -9
  107. scraper2/app/ports/sinks/c108_sink_port.py +0 -9
  108. scraper2/app/usecases/fetch/fetch_c101.py +0 -43
  109. scraper2/app/usecases/fetch/fetch_c103.py +0 -103
  110. scraper2/app/usecases/fetch/fetch_c104.py +0 -76
  111. scraper2/app/usecases/fetch/fetch_c106.py +0 -90
  112. scraper2/app/usecases/fetch/fetch_c108.py +0 -49
  113. scraper2/app/usecases/ingest/ingest_c101.py +0 -36
  114. scraper2/app/usecases/ingest/ingest_c103.py +0 -37
  115. scraper2/app/usecases/ingest/ingest_c104.py +0 -37
  116. scraper2/app/usecases/ingest/ingest_c106.py +0 -38
  117. scraper2/app/usecases/ingest/ingest_c108.py +0 -39
  118. scraper2_hj3415-2.4.1.dist-info/RECORD +0 -63
  119. scraper2_hj3415-2.4.1.dist-info/entry_points.txt +0 -3
  120. {scraper2 → scraper2_hj3415}/.DS_Store +0 -0
  121. {scraper2 → scraper2_hj3415}/__init__.py +0 -0
  122. {scraper2/adapters/out → scraper2_hj3415/app}/__init__.py +0 -0
  123. {scraper2/adapters/out/playwright → scraper2_hj3415/app/adapters}/__init__.py +0 -0
  124. {scraper2 → scraper2_hj3415/app}/adapters/out/.DS_Store +0 -0
  125. {scraper2/app → scraper2_hj3415/app/adapters/out}/__init__.py +0 -0
  126. {scraper2/app/parsing → scraper2_hj3415/app/adapters/out/playwright}/__init__.py +0 -0
  127. {scraper2 → scraper2_hj3415/app}/adapters/out/sinks/.DS_Store +0 -0
  128. {scraper2/app/ports → scraper2_hj3415/app/adapters/out/sinks}/__init__.py +0 -0
  129. {scraper2/app/ports/browser → scraper2_hj3415/app/adapters/site}/__init__.py +0 -0
  130. {scraper2/app/ports/sinks → scraper2_hj3415/app/domain}/__init__.py +0 -0
  131. {scraper2/app/usecases → scraper2_hj3415/app/parsing}/__init__.py +0 -0
  132. {scraper2/app/usecases/fetch → scraper2_hj3415/app/parsing/_normalize}/__init__.py +0 -0
  133. {scraper2/app/usecases/ingest → scraper2_hj3415/app/parsing/_tables}/__init__.py +0 -0
  134. {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.7.0.dist-info}/WHEEL +0 -0
  135. {scraper2_hj3415-2.4.1.dist-info → scraper2_hj3415-2.7.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,93 @@
1
+ scraper2_hj3415/.DS_Store,sha256=sEtRhy6uiX4PgYuHnRIsUN_QtI0jR0lSLi4kYurHsso,6148
2
+ scraper2_hj3415/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ scraper2_hj3415/cli.py,sha256=tGQWZAkE2qzZiKbD0G1d2Tiw28dWMjYL_QPAIU5C_GI,8066
4
+ scraper2_hj3415/app/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ scraper2_hj3415/app/composition.py,sha256=t9NnNDLL6-VT28dT_kBtc_4Sd6qdR_cGKcY2_wmQHMI,6573
6
+ scraper2_hj3415/app/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ scraper2_hj3415/app/adapters/out/.DS_Store,sha256=nUqwRB5F2DM82P8BALYvDI0YoD1UbmngfSi8ukKkY7E,6148
8
+ scraper2_hj3415/app/adapters/out/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ scraper2_hj3415/app/adapters/out/playwright/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ scraper2_hj3415/app/adapters/out/playwright/browser.py,sha256=XBo3a3sZjLwoJpDMcsr-I2iwRtPON9M9-6JkMdyPNi8,563
11
+ scraper2_hj3415/app/adapters/out/playwright/browser_factory.py,sha256=IAyJeYXkREtghsus411ganBdhQ-tjXoZm7lCG68-yB8,3781
12
+ scraper2_hj3415/app/adapters/out/playwright/session.py,sha256=GLlpO0rLQwXnxX1GyaRxy7P2UsYrceNczfhUvx3pepE,3734
13
+ scraper2_hj3415/app/adapters/out/playwright/capabilities/__init__.py,sha256=SxNeQhM-jdV1fCdTqXDekq8kTU5yoR3FMy-U5HKRjd0,518
14
+ scraper2_hj3415/app/adapters/out/playwright/capabilities/_base.py,sha256=5wgrv7A3lijVFTOCul--CdkNrgmqO3-2ji0Z_q-Bum0,726
15
+ scraper2_hj3415/app/adapters/out/playwright/capabilities/interaction.py,sha256=Bf73QhxglQkCYpJAds3VyeBva-X3Fli25GISjk13E64,1098
16
+ scraper2_hj3415/app/adapters/out/playwright/capabilities/navigation.py,sha256=waHZ75_6zC1gHb1SB0bir4wPuaDuKFB3jzO1WKKEQP0,847
17
+ scraper2_hj3415/app/adapters/out/playwright/capabilities/scope.py,sha256=tRz-pxSJUnlXAdb5ooSk7n5hXgC92fSAfxYprIMhqLY,2455
18
+ scraper2_hj3415/app/adapters/out/playwright/capabilities/table.py,sha256=PzAsMZGzKXjGuY-2Rzm8xquOnYIwFO8FC3v20u6ya7U,2976
19
+ scraper2_hj3415/app/adapters/out/playwright/capabilities/text.py,sha256=6i_jirRBpGeyJ70qb-q7L8_bM7Tj8OdAxQiAVXVYSAw,1020
20
+ scraper2_hj3415/app/adapters/out/playwright/capabilities/wait.py,sha256=2x_SQlLQQkyojrs0bTtF9UbcdxPuqucFOeVrNGunMWk,3019
21
+ scraper2_hj3415/app/adapters/out/sinks/.DS_Store,sha256=c6VOGBl6bMmFzab335dcT09WinGd4BCZXZiPjrZjd7o,6148
22
+ scraper2_hj3415/app/adapters/out/sinks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
+ scraper2_hj3415/app/adapters/out/sinks/memory_sink.py,sha256=2ietFp4XhjktJWwyUc11QCse-9xDWIMSm4UVh8XvRuk,719
24
+ scraper2_hj3415/app/adapters/out/sinks/mongo_sink.py,sha256=sS_q7pV5jQgudXjji9qTcKph3foSsrcILk-YDCdxuiM,1682
25
+ scraper2_hj3415/app/adapters/out/sinks/store.py,sha256=yerl6NvaacVHDGnQ1Obc31aQFxxXUCWfGq2DKKV8aTc,2826
26
+ scraper2_hj3415/app/adapters/site/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
+ scraper2_hj3415/app/adapters/site/wisereport_playwright.py,sha256=nhZj_P6GFipS7hLxVD3KENWu4pEnGWyigAs00-0WB7Y,15010
28
+ scraper2_hj3415/app/domain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
+ scraper2_hj3415/app/domain/blocks.py,sha256=ddrGYo12hRI3h3mLsGzrM9RUZuEabqdfBWsx5l8jVGs,1871
30
+ scraper2_hj3415/app/domain/constants.py,sha256=QXDmubt47wLV_qtXfj3no8xxm2os65GY--ZbsQJAJFo,1179
31
+ scraper2_hj3415/app/domain/doc.py,sha256=G9Ik6f-HiNZjiT26Obtrx6HTyYsTbMg1vKaBQ536hOI,483
32
+ scraper2_hj3415/app/domain/endpoint.py,sha256=8nwV0ybBYDKS8ULngNvc_dh-Pl4BqpMJrdKHDuThSTA,222
33
+ scraper2_hj3415/app/domain/series.py,sha256=KZqHqavPkL8p14F58wuUrOS-N9k0pKfKBdhDL6kTYU4,300
34
+ scraper2_hj3415/app/domain/types.py,sha256=_fPII4xFc_zTWuVm-V_SdaB092XR2OeS0sNdJVwE5t8,374
35
+ scraper2_hj3415/app/parsing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
+ scraper2_hj3415/app/parsing/c101_parser.py,sha256=fA7-EUlG3GhLlX8l7m9p-Bn5O2WQY0H9h7IAIDvlmGQ,2049
37
+ scraper2_hj3415/app/parsing/c103_parser.py,sha256=NmJbNmWqASHPA9or_raf-DM8bvwGaC1UxA8LOINkOu0,783
38
+ scraper2_hj3415/app/parsing/c104_parser.py,sha256=V5cQXobEHuy0ScaqziWDM7H_7AH9ZgJcB0m7olnawUM,827
39
+ scraper2_hj3415/app/parsing/c106_parser.py,sha256=DvXv_OndsWed4LOyv8D9bsCxbj8_6rYrfR6ICR-VBnM,4346
40
+ scraper2_hj3415/app/parsing/c108_parser.py,sha256=Kopf3CAV4W66YR6at7isoNV-C8A7-eCOQcqPs85FgEE,7572
41
+ scraper2_hj3415/app/parsing/_normalize/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
+ scraper2_hj3415/app/parsing/_normalize/label.py,sha256=yUtUalOlXuckzsQ7RXqdQ6F4Q2UmVBGr-zoEpZ6ryX0,2752
43
+ scraper2_hj3415/app/parsing/_normalize/table.py,sha256=V6I79gOHeRsyFiuIXyEy0Whg-pxZeDdTaGh0cdR5mrE,1464
44
+ scraper2_hj3415/app/parsing/_normalize/text.py,sha256=BnBZyaQiuydsQVUSDgIEn8JYYbxrM-3BZTmvNqiFK3g,683
45
+ scraper2_hj3415/app/parsing/_normalize/values.py,sha256=X5H7xprg5y8pkXilXCg_br7UIPjErcLHGDkOrxjctbk,1824
46
+ scraper2_hj3415/app/parsing/_tables/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
+ scraper2_hj3415/app/parsing/_tables/html_table.py,sha256=eNkapEkmv0MPKEOWvknJi4yjDSqbTy2pKO-FPdj9AN0,2513
48
+ scraper2_hj3415/app/parsing/c101/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
+ scraper2_hj3415/app/parsing/c101/_sise_normalizer.py,sha256=0wG9AZ6MYvWMf_UA9QK3_TbyDctdi3ldyKNKwmLJJic,3150
50
+ scraper2_hj3415/app/parsing/c101/company_overview.py,sha256=R6K44Rlw9Iv740SR2XZDxBkZxsLi_SNB6n3xruO3xqk,1391
51
+ scraper2_hj3415/app/parsing/c101/earning_surprise.py,sha256=QqiVVrdJuQXm25mvm0AsQB7gi461IiUbAoD8iCamUjg,7028
52
+ scraper2_hj3415/app/parsing/c101/fundamentals.py,sha256=3Fy6ya53NF-VoXa16GDpqUTdoFo2PIEjt5rjlXNa8sI,2875
53
+ scraper2_hj3415/app/parsing/c101/major_shareholders.py,sha256=sF1j1VNZSoIkQYHmuhMWSx52l00WDf6El2NkiRoXW0o,2047
54
+ scraper2_hj3415/app/parsing/c101/sise.py,sha256=Mky6pLWZ_LZkeUMHIPcZfrv0icTNxWEc7uTYKU2uJ0M,1314
55
+ scraper2_hj3415/app/parsing/c101/summary_cmp.py,sha256=hhBCtH7hgAKFUh4gr7J-mz-6c9NLT9KZODFY8LTG-Fc,2776
56
+ scraper2_hj3415/app/parsing/c101/yearly_consensus.py,sha256=FCLA-pYCMbQffNYOV6YbZ8GnPJjyZHmCSIKdw9-EPuI,5572
57
+ scraper2_hj3415/app/ports/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
+ scraper2_hj3415/app/ports/browser/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
+ scraper2_hj3415/app/ports/browser/browser_factory_port.py,sha256=exG5XM3yen84lqsY0ggpZ58kcyCyoaMusDMooVxCGG0,353
60
+ scraper2_hj3415/app/ports/browser/browser_port.py,sha256=msMxtFQAVw58j-J-QAuY83gwrmdODsT9Iq6hHOJzLVM,742
61
+ scraper2_hj3415/app/ports/browser/capabilities/__init__.py,sha256=xnLRb_0DWKnx6hOIlypwzHN-BFFejQX9w35_HC1OmU4,401
62
+ scraper2_hj3415/app/ports/browser/capabilities/interaction.py,sha256=XiDewLWxp2M19WqYe-6ozismpjTm_gVmHdmvsr0HqLk,588
63
+ scraper2_hj3415/app/ports/browser/capabilities/navigation.py,sha256=IOBFV8PkdPMKVJUG7c9FfKPNkuSlTeKXDkVvCsNhxbg,424
64
+ scraper2_hj3415/app/ports/browser/capabilities/scope.py,sha256=9CFaCoKTMU_scUnkcgC6QU-Lke7JmfWP0-VP4mWalQA,1675
65
+ scraper2_hj3415/app/ports/browser/capabilities/table.py,sha256=9ubgKZtRvm_hktiyNdDgok1-VItGgjmwisFk4ZaLbbM,588
66
+ scraper2_hj3415/app/ports/browser/capabilities/text.py,sha256=c1ZlZttSsreeUFJcSZMUKExFTB-gWAsLcYK15dxCc9Y,516
67
+ scraper2_hj3415/app/ports/browser/capabilities/wait.py,sha256=K5hhm8SJauDEsKBu3fJDc9ZV19_-6YlHVvGallMuzKQ,1132
68
+ scraper2_hj3415/app/ports/ingest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
+ scraper2_hj3415/app/ports/ingest/nfs_ingest_port.py,sha256=Ia8GByRLV9SPEU89I8A4V6tmUwsO8f7xM7-yVxnsA0o,658
70
+ scraper2_hj3415/app/ports/sinks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
+ scraper2_hj3415/app/ports/sinks/nfs_sink_port.py,sha256=Y0Op18MVUZ1kIZ2oQr9GOtskMGZq1DC0tGCBjBcbeHA,519
72
+ scraper2_hj3415/app/ports/site/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
73
+ scraper2_hj3415/app/ports/site/wisereport_port.py,sha256=dx2tgWLW3vJbXZTFd-eLNfsYbcWe7xoikYQb3IEXVyE,858
74
+ scraper2_hj3415/app/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
75
+ scraper2_hj3415/app/services/nfs_doc_builders.py,sha256=vxZgNKEIRroYsEnD6PfLt9pB4w12JU5g-E7aRmY_raY,8924
76
+ scraper2_hj3415/app/services/fetch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
+ scraper2_hj3415/app/services/fetch/fetch_c101.py,sha256=nl96RKvahNS9YtusE4e9AFMb0Wf7UHaW2X6qAAzYuCY,2228
78
+ scraper2_hj3415/app/services/fetch/fetch_c103.py,sha256=Jv4HByrJodVFwsMOlNPDlHaOQ8B_-Oeq7F4UWT0gxnA,4544
79
+ scraper2_hj3415/app/services/fetch/fetch_c104.py,sha256=qQCeg1Gwbai6jzbhoKzdoRctG7hQyCHrnK6H6n6kd3Q,5883
80
+ scraper2_hj3415/app/services/fetch/fetch_c106.py,sha256=UIAMaQB-FHXsvQ0ONI3fTO22M3npXrHILxO_4ayY2Lk,3462
81
+ scraper2_hj3415/app/services/fetch/fetch_c108.py,sha256=o9GesH66jqCygZIEbrHlVwHGGK1_2Ilc2_1b24fSC54,2236
82
+ scraper2_hj3415/app/usecases/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
83
+ scraper2_hj3415/app/usecases/ingest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
84
+ scraper2_hj3415/app/usecases/ingest/ingest_c101.py,sha256=qa1sH9SpJENU24stCLepRpLTpbpR1zKZkNmVhjbE9Qs,3757
85
+ scraper2_hj3415/app/usecases/ingest/ingest_c103.py,sha256=xbAR9fZYRKxScqDaz_WqXiK2c2XPEw2YQS0tboshxu0,5402
86
+ scraper2_hj3415/app/usecases/ingest/ingest_c104.py,sha256=H9p9XB6qIvTedojmSLbhZ3hqiphjTt0UBjpmYtXpvTc,5956
87
+ scraper2_hj3415/app/usecases/ingest/ingest_c106.py,sha256=z7HJuJ0udtSVE0YJyX9_8aZOO7q1Q2inXQ1AFL8fgCU,4298
88
+ scraper2_hj3415/app/usecases/ingest/ingest_c108.py,sha256=f7KzoPr_0V9blkpVJa1NFD4Yo6jgvn06WTc2LbeChkQ,3814
89
+ scraper2_hj3415-2.7.0.dist-info/entry_points.txt,sha256=jNGmOvBmptIUr9_XUMQOH4s6jNKCU51jOhKd31gOe8c,52
90
+ scraper2_hj3415-2.7.0.dist-info/licenses/LICENSE,sha256=QBiVGQuKAESeCfQE344Ik2ex6g2zfYdu9WqrRWydxIs,1068
91
+ scraper2_hj3415-2.7.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
92
+ scraper2_hj3415-2.7.0.dist-info/METADATA,sha256=lBa1FeQPKAwR6a_bHTke2SaczCmUVQQZL-TQl1nCmWA,3516
93
+ scraper2_hj3415-2.7.0.dist-info/RECORD,,
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ scraper2=scraper2_hj3415.cli:app
3
+
@@ -1,102 +0,0 @@
1
- # src/scraper2/adapters/out/playwright/session.py
2
- from __future__ import annotations
3
- from typing import Any
4
- from io import StringIO
5
- import pandas as pd
6
- from playwright.async_api import Page
7
-
8
- class PlaywrightBrowser:
9
- def __init__(self, page: Page):
10
- self.page = page
11
-
12
- async def goto(self, url: str, timeout_ms: int = 10_000) -> None:
13
- await self.page.goto(url, timeout=timeout_ms)
14
-
15
- async def title(self) -> str:
16
- return await self.page.title()
17
-
18
- async def current_url(self) -> str:
19
- return self.page.url
20
-
21
- async def wait(self, selector: str, timeout_ms: int = 10_000) -> None:
22
- await self.page.wait_for_selector(selector, timeout=timeout_ms, state="attached")
23
-
24
- async def text(self, selector: str) -> str:
25
- await self.wait(selector)
26
- return (await self.page.locator(selector).first.text_content()) or ""
27
-
28
- async def texts(self, selector: str) -> list[str]:
29
- await self.wait(selector)
30
- loc = self.page.locator(selector)
31
- items = await loc.all()
32
- out: list[str] = []
33
- for it in items:
34
- out.append((await it.text_content()) or "")
35
- return out
36
-
37
- async def text_first_by_text(self, needle: str) -> str:
38
- return (await self.page.get_by_text(needle).first.text_content()) or ""
39
-
40
- async def inner_text(self, selector: str) -> str:
41
- await self.wait(selector)
42
- return await self.page.locator(selector).first.inner_text()
43
-
44
- async def click(self, selector: str) -> None:
45
- await self.wait(selector)
46
- await self.page.locator(selector).click()
47
-
48
- async def table_records(
49
- self,
50
- table_selector: str,
51
- *,
52
- header: int | list[int] = 0
53
- ) -> list[dict[str, Any]]:
54
- await self.wait(table_selector)
55
-
56
- table = self.page.locator(table_selector).first
57
- html = await table.evaluate("el => el.outerHTML") # <table> 포함
58
- #print(html)
59
-
60
- try:
61
- df = pd.read_html(StringIO(html), header=header)[0]
62
- #print(df.head(3))
63
- except Exception as e:
64
- # ImportError(lxml 없음), ValueError 등 모두 여기서 잡아서 원인 노출
65
- raise RuntimeError(f"pd.read_html failed: {type(e).__name__}: {e}") from e
66
-
67
- if header == 0:
68
- if "항목" in df.columns:
69
- df["항목"] = df["항목"].astype(str).str.replace("펼치기", "").str.strip()
70
-
71
- df.columns = (
72
- df.columns.astype(str)
73
- .str.replace("연간컨센서스보기", "", regex=False)
74
- .str.replace("연간컨센서스닫기", "", regex=False)
75
- .str.replace("(IFRS연결)", "", regex=False)
76
- .str.replace("(IFRS별도)", "", regex=False)
77
- .str.replace("(GAAP개별)", "", regex=False)
78
- .str.replace("(YoY)", "", regex=False)
79
- .str.replace("(QoQ)", "", regex=False)
80
- .str.replace("(E)", "", regex=False)
81
- .str.replace(".", "", regex=False)
82
- .str.strip()
83
- )
84
-
85
- # NaN -> None 처리
86
- records: list[dict[str, Any]] = df.where(pd.notnull(df), None).to_dict(orient="records")
87
- return records
88
-
89
- async def outer_html(self, selector: str) -> str:
90
- loc = self.page.locator(selector).first
91
- return await loc.evaluate("el => el.outerHTML")
92
-
93
- async def all_texts(self, selector: str) -> list[str]:
94
- # selector는 css도 되고, "xpath=..." 도 됨
95
- loc = self.page.locator(selector)
96
- return await loc.all_text_contents()
97
-
98
- async def outer_html_nth(self, selector: str, index: int) -> str:
99
- loc = self.page.locator(selector).nth(index)
100
- # index가 범위를 벗어나면 playwright가 에러를 내는데,
101
- # 필요하면 여기서 더 친절한 에러로 감싸도 됨.
102
- return await loc.evaluate("el => el.outerHTML")
@@ -1,15 +0,0 @@
1
- # scraper2/adapters/out/sinks/memory/__init__.py
2
- from .c101_memory_sink import MemoryC101Sink
3
- from .c103_memory_sink import MemoryC103Sink
4
- from .c104_memory_sink import MemoryC104Sink
5
- from .c106_memory_sink import MemoryC106Sink
6
- from .c108_memory_sink import MemoryC108Sink
7
-
8
- __all__ = [
9
- "MemoryC101Sink",
10
- "MemoryC103Sink",
11
- "MemoryC104Sink",
12
- "MemoryC106Sink",
13
- "MemoryC108Sink",
14
- ]
15
-
@@ -1,26 +0,0 @@
1
- #scraper2/adapters/out/sinks/memory/c101_memory_sink.py
2
- from __future__ import annotations
3
-
4
- from datetime import datetime
5
- from typing import Iterable, Optional
6
-
7
- from contracts.nfs.c101 import C101DTO
8
- from scraper2.adapters.out.sinks.memory.store import InMemoryStore
9
- from scraper2.app.ports.sinks.c101_sink_port import C101SinkPort
10
-
11
- _ENDPOINT = "c101"
12
-
13
- class MemoryC101Sink(C101SinkPort):
14
- def __init__(self, store: InMemoryStore[C101DTO]):
15
- self._store = store
16
-
17
- async def write(self, dto: C101DTO, *, asof: Optional[datetime] = None) -> None:
18
- await self._store.put(_ENDPOINT, dto.코드, dto)
19
-
20
- async def write_many(
21
- self,
22
- dtos: Iterable[C101DTO],
23
- *,
24
- asof: Optional[datetime] = None,
25
- ) -> None:
26
- await self._store.put_many(_ENDPOINT, ((d.코드, d) for d in dtos))
@@ -1,26 +0,0 @@
1
- # scraper2/adapters/out/sinks/memory/c103_memory_sink.py
2
- from __future__ import annotations
3
-
4
- from datetime import datetime
5
- from typing import Iterable, Optional
6
-
7
- from contracts.nfs.c103 import C103DTO
8
- from scraper2.adapters.out.sinks.memory.store import InMemoryStore
9
- from scraper2.app.ports.sinks.c103_sink_port import C103SinkPort
10
-
11
- _ENDPOINT = "c103"
12
-
13
- class MemoryC103Sink(C103SinkPort):
14
- def __init__(self, store: InMemoryStore[C103DTO]):
15
- self._store = store
16
-
17
- async def write(self, dto: C103DTO, *, asof: Optional[datetime] = None) -> None:
18
- await self._store.put(_ENDPOINT, dto.코드, dto)
19
-
20
- async def write_many(
21
- self,
22
- dtos: Iterable[C103DTO],
23
- *,
24
- asof: Optional[datetime] = None,
25
- ) -> None:
26
- await self._store.put_many(_ENDPOINT, ((d.코드, d) for d in dtos))
@@ -1,26 +0,0 @@
1
- # scraper2/adapters/out/sinks/memory/c104_memory_sink.py
2
- from __future__ import annotations
3
-
4
- from datetime import datetime
5
- from typing import Iterable, Optional
6
-
7
- from contracts.nfs.c104 import C104DTO
8
- from scraper2.adapters.out.sinks.memory.store import InMemoryStore
9
- from scraper2.app.ports.sinks.c104_sink_port import C104SinkPort
10
-
11
- _ENDPOINT = "c104"
12
-
13
- class MemoryC104Sink(C104SinkPort):
14
- def __init__(self, store: InMemoryStore[C104DTO]):
15
- self._store = store
16
-
17
- async def write(self, dto: C104DTO, *, asof: Optional[datetime] = None) -> None:
18
- await self._store.put(_ENDPOINT, dto.코드, dto)
19
-
20
- async def write_many(
21
- self,
22
- dtos: Iterable[C104DTO],
23
- *,
24
- asof: Optional[datetime] = None,
25
- ) -> None:
26
- await self._store.put_many(_ENDPOINT, ((d.코드, d) for d in dtos))
@@ -1,26 +0,0 @@
1
- #scraper2/adapters/out/sinks/memory/c106_memory_sink.py
2
- from __future__ import annotations
3
-
4
- from datetime import datetime
5
- from typing import Iterable, Optional
6
-
7
- from contracts.nfs.c106 import C106DTO
8
- from scraper2.adapters.out.sinks.memory.store import InMemoryStore
9
- from scraper2.app.ports.sinks.c106_sink_port import C106SinkPort
10
-
11
- _ENDPOINT = "c106"
12
-
13
- class MemoryC106Sink(C106SinkPort):
14
- def __init__(self, store: InMemoryStore[C106DTO]):
15
- self._store = store
16
-
17
- async def write(self, dto: C106DTO, *, asof: Optional[datetime] = None) -> None:
18
- await self._store.put(_ENDPOINT, dto.코드, dto)
19
-
20
- async def write_many(
21
- self,
22
- dtos: Iterable[C106DTO],
23
- *,
24
- asof: Optional[datetime] = None,
25
- ) -> None:
26
- await self._store.put_many(_ENDPOINT, ((d.코드, d) for d in dtos))
@@ -1,26 +0,0 @@
1
- #scraper2/adapters/out/sinks/memory/c108_memory_sink.py
2
- from __future__ import annotations
3
-
4
- from datetime import datetime
5
- from typing import Iterable, Optional
6
-
7
- from contracts.nfs.c108 import C108DTO
8
- from scraper2.adapters.out.sinks.memory.store import InMemoryStore
9
- from scraper2.app.ports.sinks.c108_sink_port import C108SinkPort
10
-
11
- _ENDPOINT = "c108"
12
-
13
- class MemoryC108Sink(C108SinkPort):
14
- def __init__(self, store: InMemoryStore[C108DTO]):
15
- self._store = store
16
-
17
- async def write(self, dto: C108DTO, *, asof: Optional[datetime] = None) -> None:
18
- await self._store.put(_ENDPOINT, dto.코드, dto)
19
-
20
- async def write_many(
21
- self,
22
- dtos: Iterable[C108DTO],
23
- *,
24
- asof: Optional[datetime] = None,
25
- ) -> None:
26
- await self._store.put_many(_ENDPOINT, ((d.코드, d) for d in dtos))
@@ -1,14 +0,0 @@
1
- # scraper2/adapters/out/sinks/mongo/__init__.py
2
- from .c101_mongo_sink import MongoC101Sink
3
- from .c103_mongo_sink import MongoC103Sink
4
- from .c104_mongo_sink import MongoC104Sink
5
- from .c106_mongo_sink import MongoC106Sink
6
- from .c108_mongo_sink import MongoC108Sink
7
-
8
- __all__ = [
9
- "MongoC101Sink",
10
- "MongoC103Sink",
11
- "MongoC104Sink",
12
- "MongoC106Sink",
13
- "MongoC108Sink",
14
- ]
@@ -1,43 +0,0 @@
1
- # scraper2/adapters/out/sinks/mongo/c101_mongo_sink.py
2
- from __future__ import annotations
3
-
4
- from datetime import datetime
5
- from typing import Iterable, Optional
6
-
7
- from pymongo.asynchronous.database import AsyncDatabase
8
-
9
- from contracts.nfs.c101 import C101DTO
10
- from scraper2.app.ports.sinks.c101_sink_port import C101SinkPort
11
-
12
- from db2.nfs import (
13
- upsert_latest,
14
- upsert_latest_many,
15
- insert_snapshot,
16
- insert_snapshots_many,
17
- )
18
-
19
- _ENDPOINT = "c101"
20
-
21
-
22
- class MongoC101Sink(C101SinkPort):
23
- def __init__(self, db: AsyncDatabase):
24
- self._db = db
25
-
26
- async def write(self, dto: C101DTO, *, asof: Optional[datetime] = None) -> None:
27
- # 최신 상태는 upsert
28
- await upsert_latest(self._db, endpoint=_ENDPOINT, dto=dto, asof=asof)
29
- # 히스토리는 insert
30
- await insert_snapshot(self._db, endpoint=_ENDPOINT, dto=dto, asof=asof)
31
-
32
- async def write_many(
33
- self,
34
- dtos: Iterable[C101DTO],
35
- *,
36
- asof: Optional[datetime] = None,
37
- ) -> None:
38
- dtos_list = list(dtos)
39
- if not dtos_list:
40
- return
41
-
42
- await upsert_latest_many(self._db, endpoint=_ENDPOINT, dtos=dtos_list, asof=asof)
43
- await insert_snapshots_many(self._db, endpoint=_ENDPOINT, dtos=dtos_list, asof=asof)
@@ -1,41 +0,0 @@
1
- # scraper2/adapters/out/sinks/mongo/c103_mongo_sink.py
2
- from __future__ import annotations
3
-
4
- from datetime import datetime
5
- from typing import Iterable, Optional
6
-
7
- from pymongo.asynchronous.database import AsyncDatabase
8
-
9
- from contracts.nfs.c103 import C103DTO
10
- from scraper2.app.ports.sinks.c103_sink_port import C103SinkPort
11
-
12
- from db2.nfs import (
13
- upsert_latest,
14
- upsert_latest_many,
15
- insert_snapshot,
16
- insert_snapshots_many,
17
- )
18
-
19
- _ENDPOINT = "c103"
20
-
21
-
22
- class MongoC103Sink(C103SinkPort):
23
- def __init__(self, db: AsyncDatabase):
24
- self._db = db
25
-
26
- async def write(self, dto: C103DTO, *, asof: Optional[datetime] = None) -> None:
27
- await upsert_latest(self._db, endpoint=_ENDPOINT, dto=dto, asof=asof)
28
- await insert_snapshot(self._db, endpoint=_ENDPOINT, dto=dto, asof=asof)
29
-
30
- async def write_many(
31
- self,
32
- dtos: Iterable[C103DTO],
33
- *,
34
- asof: Optional[datetime] = None,
35
- ) -> None:
36
- dtos_list = list(dtos)
37
- if not dtos_list:
38
- return
39
-
40
- await upsert_latest_many(self._db, endpoint=_ENDPOINT, dtos=dtos_list, asof=asof)
41
- await insert_snapshots_many(self._db, endpoint=_ENDPOINT, dtos=dtos_list, asof=asof)
@@ -1,41 +0,0 @@
1
- # scraper2/adapters/out/sinks/mongo/c104_mongo_sink.py
2
- from __future__ import annotations
3
-
4
- from datetime import datetime
5
- from typing import Iterable, Optional
6
-
7
- from pymongo.asynchronous.database import AsyncDatabase
8
-
9
- from contracts.nfs.c104 import C104DTO
10
- from scraper2.app.ports.sinks.c104_sink_port import C104SinkPort
11
-
12
- from db2.nfs import (
13
- upsert_latest,
14
- upsert_latest_many,
15
- insert_snapshot,
16
- insert_snapshots_many,
17
- )
18
-
19
- _ENDPOINT = "c104"
20
-
21
-
22
- class MongoC104Sink(C104SinkPort):
23
- def __init__(self, db: AsyncDatabase):
24
- self._db = db
25
-
26
- async def write(self, dto: C104DTO, *, asof: Optional[datetime] = None) -> None:
27
- await upsert_latest(self._db, endpoint=_ENDPOINT, dto=dto, asof=asof)
28
- await insert_snapshot(self._db, endpoint=_ENDPOINT, dto=dto, asof=asof)
29
-
30
- async def write_many(
31
- self,
32
- dtos: Iterable[C104DTO],
33
- *,
34
- asof: Optional[datetime] = None,
35
- ) -> None:
36
- dtos_list = list(dtos)
37
- if not dtos_list:
38
- return
39
-
40
- await upsert_latest_many(self._db, endpoint=_ENDPOINT, dtos=dtos_list, asof=asof)
41
- await insert_snapshots_many(self._db, endpoint=_ENDPOINT, dtos=dtos_list, asof=asof)
@@ -1,41 +0,0 @@
1
- # scraper2/adapters/out/sinks/mongo/c106_mongo_sink.py
2
- from __future__ import annotations
3
-
4
- from datetime import datetime
5
- from typing import Iterable, Optional
6
-
7
- from pymongo.asynchronous.database import AsyncDatabase
8
-
9
- from contracts.nfs.c106 import C106DTO
10
- from scraper2.app.ports.sinks.c106_sink_port import C106SinkPort
11
-
12
- from db2.nfs import (
13
- upsert_latest,
14
- upsert_latest_many,
15
- insert_snapshot,
16
- insert_snapshots_many,
17
- )
18
-
19
- _ENDPOINT = "c106"
20
-
21
-
22
- class MongoC106Sink(C106SinkPort):
23
- def __init__(self, db: AsyncDatabase):
24
- self._db = db
25
-
26
- async def write(self, dto: C106DTO, *, asof: Optional[datetime] = None) -> None:
27
- await upsert_latest(self._db, endpoint=_ENDPOINT, dto=dto, asof=asof)
28
- await insert_snapshot(self._db, endpoint=_ENDPOINT, dto=dto, asof=asof)
29
-
30
- async def write_many(
31
- self,
32
- dtos: Iterable[C106DTO],
33
- *,
34
- asof: Optional[datetime] = None,
35
- ) -> None:
36
- dtos_list = list(dtos)
37
- if not dtos_list:
38
- return
39
-
40
- await upsert_latest_many(self._db, endpoint=_ENDPOINT, dtos=dtos_list, asof=asof)
41
- await insert_snapshots_many(self._db, endpoint=_ENDPOINT, dtos=dtos_list, asof=asof)
@@ -1,41 +0,0 @@
1
- # scraper2/adapters/out/sinks/mongo/c108_mongo_sink.py
2
- from __future__ import annotations
3
-
4
- from datetime import datetime
5
- from typing import Iterable, Optional
6
-
7
- from pymongo.asynchronous.database import AsyncDatabase
8
-
9
- from contracts.nfs.c108 import C108DTO
10
- from scraper2.app.ports.sinks.c108_sink_port import C108SinkPort
11
-
12
- from db2.nfs import (
13
- upsert_latest,
14
- upsert_latest_many,
15
- insert_snapshot,
16
- insert_snapshots_many,
17
- )
18
-
19
- _ENDPOINT = "c108"
20
-
21
-
22
- class MongoC108Sink(C108SinkPort):
23
- def __init__(self, db: AsyncDatabase):
24
- self._db = db
25
-
26
- async def write(self, dto: C108DTO, *, asof: Optional[datetime] = None) -> None:
27
- await upsert_latest(self._db, endpoint=_ENDPOINT, dto=dto, asof=asof)
28
- await insert_snapshot(self._db, endpoint=_ENDPOINT, dto=dto, asof=asof)
29
-
30
- async def write_many(
31
- self,
32
- dtos: Iterable[C108DTO],
33
- *,
34
- asof: Optional[datetime] = None,
35
- ) -> None:
36
- dtos_list = list(dtos)
37
- if not dtos_list:
38
- return
39
-
40
- await upsert_latest_many(self._db, endpoint=_ENDPOINT, dtos=dtos_list, asof=asof)
41
- await insert_snapshots_many(self._db, endpoint=_ENDPOINT, dtos=dtos_list, asof=asof)